dpdk中kni模块

一，什么是kni，为什么要有kni

Kni(Kernel NIC Interface)内核网卡接口，是DPDK允许用户态和内核态交换报文的解决方案，模拟了一个虚拟的网口，提供dpdk的应用程序和linux内核之间通讯。kni接口允许报文从用户态接收后转发到linu协议栈去。

为什么要弄一个kni接口，虽然dpdk的高速转发性能很出色，但是也有自己的一些缺点，比如没有协议栈就是其中一项缺陷，当然也可能当时设计时就将没有将协议栈考虑进去，毕竟协议栈需要将报文转发处理，可能会使

处理报文的能力大大降低。

直接上图：

上图是kni的mbuf使用流程图，也可以看出报文的流向，因为报文在代码中其实就是一个个内存指针。其中rx_q右边是用户态，左边是内核态。最后通过调用netif_rx()将报文送入linux协议栈，这其中需要将dpdk的mbuf转换成skb_buf。

当linux向kni端口发送报文时，调用回调函数kni_net_tx()，然后报文经过转换之后发送到端口上。

二：主要代码分析：

1，和igb uio模块一样，kni模块分成内核以及用户态代码，内核模块在编译出来之后为rte_kni.ko，首先插入内核，dpdk提供了一个用户态的例子。首先看下kni内核模块代码：

在kni_misc.c中，ko代码入口为

module_init(kni_init);

可以看到函数从kni_init进入：

 static int __init

 kni_init(void)

 {

     int rc;

     KNI_PRINT("######## DPDK kni module loading ########\n");

     if (kni_parse_kthread_mode() < ) {    //kni的线程模式、单线程还是多线程

         KNI_ERR("Invalid parameter for kthread_mode\n");

         return -EINVAL;

     }

 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS

     rc = register_pernet_subsys(&kni_net_ops);

 #else

     rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);

 #endif

     if (rc)

         return -EPERM;

     rc = misc_register(&kni_misc);

     if (rc != ) {

         KNI_ERR("Misc registration failed\n");

         goto out;

     }

     /* Configure the lo mode according to the input parameter */

     kni_net_config_lo_mode(lo_mode);

     KNI_PRINT("######## DPDK kni module loaded  ########\n");

     return ;

 out:

 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS

     unregister_pernet_subsys(&kni_net_ops);

 #else

     register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);

 #endif

     return rc;

 }

代码比较简单，首先选择kni的线程模式，分为单线程还是多线程，所谓单线程是指所有的kni端口收发都由一个线程守护，多线程只是每一个kni端口分为由一个线程守护，这部分是在插入模块时带入参数选择。

接着调用注册函数misc_register，将kni注册为一个混杂设备。其中kni_misc结构体里面定义了该混杂设备的一些操作

 static struct miscdevice kni_misc = {

     .minor = MISC_DYNAMIC_MINOR,

     .name = KNI_DEVICE,

     .fops = &kni_fops,

 };

这里主要看.fops里面的结构体

 static struct file_operations kni_fops = {

     .owner = THIS_MODULE,

     .open = kni_open,

     .release = kni_release,

     .unlocked_ioctl = (void *)kni_ioctl,

     .compat_ioctl = (void *)kni_compat_ioctl,

 };

这里涉及的主要操作有kni_open，kni_release，以及kni_ioctl，分别对应几个函数

 static int

 kni_open(struct inode *inode, struct file *file)

 {

     struct net *net = current->nsproxy->net_ns;

     struct kni_net *knet = net_generic(net, kni_net_id);

     /* kni device can be opened by one user only per netns */

     if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))

         return -EBUSY;

     /* Create kernel thread for single mode */

     if (multiple_kthread_on == ) {

         KNI_PRINT("Single kernel thread for all KNI devices\n");

         /* Create kernel thread for RX */

         knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet,

                         "kni_single");

         if (IS_ERR(knet->kni_kthread)) {

             KNI_ERR("Unable to create kernel threaed\n");

             return PTR_ERR(knet->kni_kthread);

         }

     } else

         KNI_PRINT("Multiple kernel thread mode enabled\n");

     file->private_data = get_net(net);

     KNI_PRINT("/dev/kni opened\n");

     return ;

 }

kni_open时如果是单线程模式则会创建一个内核线程，并打开dev/kni，这个时候在host的dev下能看到kni文件夹

 static int

 kni_ioctl(struct inode *inode,

     unsigned int ioctl_num,

     unsigned long ioctl_param)

 {

     int ret = -EINVAL;

     struct net *net = current->nsproxy->net_ns;

     KNI_DBG("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);

     /*

      * Switch according to the ioctl called

      */

     switch (_IOC_NR(ioctl_num)) {

     case _IOC_NR(RTE_KNI_IOCTL_TEST):

         /* For test only, not used */

         break;

     case _IOC_NR(RTE_KNI_IOCTL_CREATE):

         ret = kni_ioctl_create(net, ioctl_num, ioctl_param);

         break;

     case _IOC_NR(RTE_KNI_IOCTL_RELEASE):

         ret = kni_ioctl_release(net, ioctl_num, ioctl_param);

         break;

     default:

         KNI_DBG("IOCTL default\n");

         break;

     }

     return ret;

 }

kni_ioctl函数是与用户态通信的一个接口，主要是的是kni_ioctl_create函数：

 static int

 kni_ioctl_create(struct net *net,

         unsigned int ioctl_num, unsigned long ioctl_param)

 {

     struct kni_net *knet = net_generic(net, kni_net_id);

     int ret;

     struct rte_kni_device_info dev_info;

     struct pci_dev *pci = NULL;

     struct pci_dev *found_pci = NULL;

     struct net_device *net_dev = NULL;

     struct net_device *lad_dev = NULL;

     struct kni_dev *kni, *dev, *n;

     printk(KERN_INFO "KNI: Creating kni...\n");

     /* Check the buffer size, to avoid warning */

     if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))

         return -EINVAL;

     /* Copy kni info from user space */

     ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));

     if (ret) {

         KNI_ERR("copy_from_user in kni_ioctl_create");

         return -EIO;

     }

     /**

      * Check if the cpu core id is valid for binding,

      * for multiple kernel thread mode.

      */

     if (multiple_kthread_on && dev_info.force_bind &&

                 !cpu_online(dev_info.core_id)) {

         KNI_ERR("cpu %u is not online\n", dev_info.core_id);

         return -EINVAL;

     }

     /* Check if it has been created */

     down_read(&knet->kni_list_lock);

     list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {

         if (kni_check_param(dev, &dev_info) < ) {

             up_read(&knet->kni_list_lock);

             return -EINVAL;

         }

     }

     up_read(&knet->kni_list_lock);

     net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,

 #ifdef NET_NAME_UNKNOWN

                             NET_NAME_UNKNOWN,

 #endif

                             kni_net_init);

     if (net_dev == NULL) {

         KNI_ERR("error allocating device \"%s\"\n", dev_info.name);

         return -EBUSY;

     }

     dev_net_set(net_dev, net);

     kni = netdev_priv(net_dev);

     kni->net_dev = net_dev;

     kni->group_id = dev_info.group_id;

     kni->core_id = dev_info.core_id;

     strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);

     /* Translate user space info into kernel space info */

     kni->tx_q = phys_to_virt(dev_info.tx_phys);

     kni->rx_q = phys_to_virt(dev_info.rx_phys);

     kni->alloc_q = phys_to_virt(dev_info.alloc_phys);

     kni->free_q = phys_to_virt(dev_info.free_phys);

     kni->req_q = phys_to_virt(dev_info.req_phys);

     kni->resp_q = phys_to_virt(dev_info.resp_phys);

     kni->sync_va = dev_info.sync_va;

     kni->sync_kva = phys_to_virt(dev_info.sync_phys);

     kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys);

     kni->mbuf_va = dev_info.mbuf_va;

 #ifdef RTE_KNI_VHOST

     kni->vhost_queue = NULL;

     kni->vq_status = BE_STOP;

 #endif

     kni->mbuf_size = dev_info.mbuf_size;

     KNI_PRINT("tx_phys:      0x%016llx, tx_q addr:      0x%p\n",

         (unsigned long long) dev_info.tx_phys, kni->tx_q);

     KNI_PRINT("rx_phys:      0x%016llx, rx_q addr:      0x%p\n",

         (unsigned long long) dev_info.rx_phys, kni->rx_q);

     KNI_PRINT("alloc_phys:   0x%016llx, alloc_q addr:   0x%p\n",

         (unsigned long long) dev_info.alloc_phys, kni->alloc_q);

     KNI_PRINT("free_phys:    0x%016llx, free_q addr:    0x%p\n",

         (unsigned long long) dev_info.free_phys, kni->free_q);

     KNI_PRINT("req_phys:     0x%016llx, req_q addr:     0x%p\n",

         (unsigned long long) dev_info.req_phys, kni->req_q);

     KNI_PRINT("resp_phys:    0x%016llx, resp_q addr:    0x%p\n",

         (unsigned long long) dev_info.resp_phys, kni->resp_q);

     KNI_PRINT("mbuf_phys:    0x%016llx, mbuf_kva:       0x%p\n",

         (unsigned long long) dev_info.mbuf_phys, kni->mbuf_kva);

     KNI_PRINT("mbuf_va:      0x%p\n", dev_info.mbuf_va);

     KNI_PRINT("mbuf_size:    %u\n", kni->mbuf_size);

     KNI_DBG("PCI: %02x:%02x.%02x %04x:%04x\n",

                     dev_info.bus,

                     dev_info.devid,

                     dev_info.function,

                     dev_info.vendor_id,

                     dev_info.device_id);

     pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL);

     /* Support Ethtool */

     while (pci) {

         KNI_PRINT("pci_bus: %02x:%02x:%02x \n",

                     pci->bus->number,

                     PCI_SLOT(pci->devfn),

                     PCI_FUNC(pci->devfn));

         if ((pci->bus->number == dev_info.bus) &&

             (PCI_SLOT(pci->devfn) == dev_info.devid) &&

             (PCI_FUNC(pci->devfn) == dev_info.function)) {

             found_pci = pci;

             switch (dev_info.device_id) {

             #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):

             #include <rte_pci_dev_ids.h>

                 ret = igb_kni_probe(found_pci, &lad_dev);

                 break;

             #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) \

                             case (dev):

             #include <rte_pci_dev_ids.h>

                 ret = ixgbe_kni_probe(found_pci, &lad_dev);

                 break;

             default:

                 ret = -;

                 break;

             }

             KNI_DBG("PCI found: pci=0x%p, lad_dev=0x%p\n",

                             pci, lad_dev);

             if (ret == ) {

                 kni->lad_dev = lad_dev;

                 kni_set_ethtool_ops(kni->net_dev);

             } else {

                 KNI_ERR("Device not supported by ethtool");

                 kni->lad_dev = NULL;

             }

             kni->pci_dev = found_pci;

             kni->device_id = dev_info.device_id;

             break;

         }

         pci = pci_get_device(dev_info.vendor_id,

                 dev_info.device_id, pci);

     }

     if (pci)

         pci_dev_put(pci);

     if (kni->lad_dev)

         memcpy(net_dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);

     else

         /*

          * Generate random mac address. eth_random_addr() is the newer

          * version of generating mac address in linux kernel.

          */

         //random_ether_addr(net_dev->dev_addr);

         memcpy(net_dev->dev_addr, &dev_info.kni_mac,ETH_ALEN);

     ret = register_netdev(net_dev);

     if (ret) {

         KNI_ERR("error %i registering device \"%s\"\n",

                     ret, dev_info.name);

         kni_dev_remove(kni);

         return -ENODEV;

     }

 #ifdef RTE_KNI_VHOST

     kni_vhost_init(kni);

 #endif

     /**

      * Create a new kernel thread for multiple mode, set its core affinity,

      * and finally wake it up.

      */

     if (multiple_kthread_on) {

         kni->pthread = kthread_create(kni_thread_multiple,

                           (void *)kni,

                           "kni_%s", kni->name);

         if (IS_ERR(kni->pthread)) {

             kni_dev_remove(kni);

             return -ECANCELED;

         }

         if (dev_info.force_bind)

             kthread_bind(kni->pthread, kni->core_id);

         wake_up_process(kni->pthread);

     }

     down_write(&knet->kni_list_lock);

     list_add(&kni->list, &knet->kni_list_head);

     up_write(&knet->kni_list_lock);

     return ;

 }

ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));这条语句会拷贝从用户态传过来的消息，dev_info主要存放了虚拟kni网口的相关参数，接下来就会根据dev_info中的参数注册一个kni网口ret = register_netdev(net_dev);

这个函数完成创建，这样就虚拟出一个网口出来。其中165行是自己修改的，因为我发现按照文档提供的方法根本不能ping通报文，我将生成kni的mac地址修改成dpdk接管的网口mac即可贯通。原生态代码是随时生成一个mac。

2，用户态代码主要分析dpdk提供的example，

 int

 main(int argc, char** argv)

 {

     int ret;

     uint8_t nb_sys_ports, port;

     unsigned i;

     /* Associate signal_hanlder function with USR signals */

     signal(SIGUSR1, signal_handler);

     signal(SIGUSR2, signal_handler);

     signal(SIGRTMIN, signal_handler);

     signal(SIGINT, signal_handler);

     /* Initialise EAL */

     ret = rte_eal_init(argc, argv);

     if (ret < )

         rte_exit(EXIT_FAILURE, "Could not initialise EAL (%d)\n", ret);

     argc -= ret;

     argv += ret;

     /* Parse application arguments (after the EAL ones) */

     ret = parse_args(argc, argv);

     if (ret < )

         rte_exit(EXIT_FAILURE, "Could not parse input parameters\n");

     /* Create the mbuf pool */

     pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF,

         MEMPOOL_CACHE_SZ, , MBUF_DATA_SZ, rte_socket_id());

     if (pktmbuf_pool == NULL) {

         rte_exit(EXIT_FAILURE, "Could not initialise mbuf pool\n");

         return -;

     }

     /* Get number of ports found in scan */

     nb_sys_ports = rte_eth_dev_count();

     if (nb_sys_ports == )

         rte_exit(EXIT_FAILURE, "No supported Ethernet device found\n");

     /* Check if the configured port ID is valid */

     for (i = ; i < RTE_MAX_ETHPORTS; i++)

         if (kni_port_params_array[i] && i >= nb_sys_ports)

             rte_exit(EXIT_FAILURE, "Configured invalid "

                         "port ID %u\n", i);

     /* Initialize KNI subsystem */

     init_kni();

     /* Initialise each port */

     for (port = ; port < nb_sys_ports; port++) {

         /* Skip ports that are not enabled */

         if (!(ports_mask & ( << port)))

             continue;

         init_port(port);

         if (port >= RTE_MAX_ETHPORTS)

             rte_exit(EXIT_FAILURE, "Can not use more than "

                 "%d ports for kni\n", RTE_MAX_ETHPORTS);

         kni_alloc(port);

     }

     check_all_ports_link_status(nb_sys_ports, ports_mask);

     /* Launch per-lcore function on every lcore */

     rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);

     RTE_LCORE_FOREACH_SLAVE(i) {

         if (rte_eal_wait_lcore(i) < )

             return -;

     }

     /* Release resources */

     for (port = ; port < nb_sys_ports; port++) {

         if (!(ports_mask & ( << port)))

             continue;

         kni_free_kni(port);

     }

 #ifdef RTE_LIBRTE_XEN_DOM0

     rte_kni_close();

 #endif

     for (i = ; i < RTE_MAX_ETHPORTS; i++)

         if (kni_port_params_array[i]) {

             rte_free(kni_port_params_array[i]);

             kni_port_params_array[i] = NULL;

         }

     return ;

 }

main函数进来进行一些eal的初始化，随后创建一个pktmbuf_pool，重点看一下init_kni();以及kni_alloc(port);rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);函数。其中init_kni()函数是初始化kni子系统

 static void

 init_kni(void)

 {

     unsigned int num_of_kni_ports = , i;

     struct kni_port_params **params = kni_port_params_array;

     /* Calculate the maximum number of KNI interfaces that will be used */

     for (i = ; i < RTE_MAX_ETHPORTS; i++) {

         if (kni_port_params_array[i]) {

             num_of_kni_ports += (params[i]->nb_lcore_k ?

                 params[i]->nb_lcore_k : );

         }

     }

     /* Invoke rte KNI init to preallocate the ports */

     rte_kni_init(num_of_kni_ports);

 }

主要代码在rte_kni_init里面

 void

 rte_kni_init(unsigned int max_kni_ifaces)

 {

     uint32_t i;

     struct rte_kni_memzone_slot *it;

     const struct rte_memzone *mz;

 #define OBJNAMSIZ 32

     char obj_name[OBJNAMSIZ];

     char mz_name[RTE_MEMZONE_NAMESIZE];

     /* Immediately return if KNI is already initialized */

     if (kni_memzone_pool.initialized) {

         RTE_LOG(WARNING, KNI, "Double call to rte_kni_init()");

         return;

     }

     if (max_kni_ifaces == ) {

         RTE_LOG(ERR, KNI, "Invalid number of max_kni_ifaces %d\n",

                             max_kni_ifaces);

         rte_panic("Unable to initialize KNI\n");

     }

     /* Check FD and open */

     if (kni_fd < ) {

         kni_fd = open("/dev/" KNI_DEVICE, O_RDWR);

         if (kni_fd < )

             rte_panic("Can not open /dev/%s\n", KNI_DEVICE);

     }

     /* Allocate slot objects */

     kni_memzone_pool.slots = (struct rte_kni_memzone_slot *)

                     rte_malloc(NULL,

                     sizeof(struct rte_kni_memzone_slot) *

                     max_kni_ifaces,

                     );

     KNI_MEM_CHECK(kni_memzone_pool.slots == NULL);

     /* Initialize general pool variables */

     kni_memzone_pool.initialized = ;

     kni_memzone_pool.max_ifaces = max_kni_ifaces;

     kni_memzone_pool.free = &kni_memzone_pool.slots[];

     rte_spinlock_init(&kni_memzone_pool.mutex);

     /* Pre-allocate all memzones of all the slots; panic on error */

     for (i = ; i < max_kni_ifaces; i++) {

         /* Recover current slot */

         it = &kni_memzone_pool.slots[i];

         it->id = i;

         /* Allocate KNI context */

         snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%d", i);

         mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni),

                     SOCKET_ID_ANY, );

         KNI_MEM_CHECK(mz == NULL);

         it->m_ctx = mz;

         /* TX RING */

         snprintf(obj_name, OBJNAMSIZ, "kni_tx_%d", i);

         mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,

                             SOCKET_ID_ANY, );

         KNI_MEM_CHECK(mz == NULL);

         it->m_tx_q = mz;

         /* RX RING */

         snprintf(obj_name, OBJNAMSIZ, "kni_rx_%d", i);

         mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,

                             SOCKET_ID_ANY, );

         KNI_MEM_CHECK(mz == NULL);

         it->m_rx_q = mz;

         /* ALLOC RING */

         snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%d", i);

         mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,

                             SOCKET_ID_ANY, );

         KNI_MEM_CHECK(mz == NULL);

         it->m_alloc_q = mz;

         /* FREE RING */

         snprintf(obj_name, OBJNAMSIZ, "kni_free_%d", i);

         mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,

                             SOCKET_ID_ANY, );

         KNI_MEM_CHECK(mz == NULL);

         it->m_free_q = mz;

         /* Request RING */

         snprintf(obj_name, OBJNAMSIZ, "kni_req_%d", i);

         mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,

                             SOCKET_ID_ANY, );

         KNI_MEM_CHECK(mz == NULL);

         it->m_req_q = mz;

         /* Response RING */

         snprintf(obj_name, OBJNAMSIZ, "kni_resp_%d", i);

         mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,

                             SOCKET_ID_ANY, );

         KNI_MEM_CHECK(mz == NULL);

         it->m_resp_q = mz;

         /* Req/Resp sync mem area */

         snprintf(obj_name, OBJNAMSIZ, "kni_sync_%d", i);

         mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,

                             SOCKET_ID_ANY, );

         KNI_MEM_CHECK(mz == NULL);

         it->m_sync_addr = mz;

         if ((i+) == max_kni_ifaces) {

             it->next = NULL;

             kni_memzone_pool.free_tail = it;

         } else

             it->next = &kni_memzone_pool.slots[i+];

     }

     return;

 kni_fail:

     rte_panic("Unable to allocate memory for max_kni_ifaces:%d. Increase the amount of hugepages memory\n",

              max_kni_ifaces);

 }

对上图中所有的fifo分配内存。

 static int

 kni_alloc(uint8_t port_id)

 {

     uint8_t i;

     struct rte_kni *kni;

     struct rte_kni_conf conf;

     struct kni_port_params **params = kni_port_params_array;

     if (port_id >= RTE_MAX_ETHPORTS || !params[port_id])

         return -;

     params[port_id]->nb_kni = params[port_id]->nb_lcore_k ?

                 params[port_id]->nb_lcore_k : ;

     for (i = ; i < params[port_id]->nb_kni; i++) {

         /* Clear conf at first */

         memset(&conf, , sizeof(conf));

         if (params[port_id]->nb_lcore_k) {

             snprintf(conf.name, RTE_KNI_NAMESIZE,

                     "vEth%u_%u", port_id, i);

             conf.core_id = params[port_id]->lcore_k[i];

             conf.force_bind = ;

         } else

             snprintf(conf.name, RTE_KNI_NAMESIZE,

                         "vEth%u", port_id);

         conf.group_id = (uint16_t)port_id;

         conf.mbuf_size = MAX_PACKET_SZ;

         rte_eth_macaddr_get(port_id, (struct ether_addr *)&conf.kni_mac);

         /*

          * The first KNI device associated to a port

          * is the master, for multiple kernel thread

          * environment.

          */

         if (i == ) {

             struct rte_kni_ops ops;

             struct rte_eth_dev_info dev_info;

             memset(&dev_info, , sizeof(dev_info));

             rte_eth_dev_info_get(port_id, &dev_info);

             conf.addr = dev_info.pci_dev->addr;

             conf.id = dev_info.pci_dev->id;

             memset(&ops, , sizeof(ops));

             ops.port_id = port_id;

             ops.change_mtu = kni_change_mtu;

             ops.config_network_if = kni_config_network_interface;

             kni = rte_kni_alloc(pktmbuf_pool, &conf, &ops);

         } else

             kni = rte_kni_alloc(pktmbuf_pool, &conf, NULL);

         if (!kni)

             rte_exit(EXIT_FAILURE, "Fail to create kni for "

                         "port: %d\n", port_id);

         params[port_id]->kni[i] = kni;

     }

     return ;

 }

 struct rte_kni *

 rte_kni_alloc(struct rte_mempool *pktmbuf_pool,

           const struct rte_kni_conf *conf,

           struct rte_kni_ops *ops)

 {

     int ret;

     struct rte_kni_device_info dev_info;

     struct rte_kni *ctx;

     char intf_name[RTE_KNI_NAMESIZE];

     char mz_name[RTE_MEMZONE_NAMESIZE];

     const struct rte_memzone *mz;

     const struct rte_mempool *mp;

     struct rte_kni_memzone_slot *slot = NULL;

     if (!pktmbuf_pool || !conf || !conf->name[])

         return NULL;

     /* Check if KNI subsystem has been initialized */

     if (kni_memzone_pool.initialized != ) {

         RTE_LOG(ERR, KNI, "KNI subsystem has not been initialized. Invoke rte_kni_init() first\n");

         return NULL;

     }

     /* Get an available slot from the pool */

     slot = kni_memzone_pool_alloc();

     if (!slot) {

         RTE_LOG(ERR, KNI, "Cannot allocate more KNI interfaces; increase the number of max_kni_ifaces(current %d) or release unusued ones.\n",

             kni_memzone_pool.max_ifaces);

         return NULL;

     }

     /* Recover ctx */

     ctx = slot->m_ctx->addr;

     snprintf(intf_name, RTE_KNI_NAMESIZE, "%s", conf->name);

     if (ctx->in_use) {

         RTE_LOG(ERR, KNI, "KNI %s is in use\n", ctx->name);

         return NULL;

     }

     memset(ctx, , sizeof(struct rte_kni));

     if (ops)

         memcpy(&ctx->ops, ops, sizeof(struct rte_kni_ops));

     memset(&dev_info, , sizeof(dev_info));

     dev_info.bus = conf->addr.bus;

     dev_info.devid = conf->addr.devid;

     dev_info.function = conf->addr.function;

     dev_info.vendor_id = conf->id.vendor_id;

     dev_info.device_id = conf->id.device_id;

     dev_info.core_id = conf->core_id;

     dev_info.force_bind = conf->force_bind;

     dev_info.group_id = conf->group_id;

     dev_info.mbuf_size = conf->mbuf_size;

     snprintf(ctx->name, RTE_KNI_NAMESIZE, "%s", intf_name);

     snprintf(dev_info.name, RTE_KNI_NAMESIZE, "%s", intf_name);

     RTE_LOG(INFO, KNI, "pci: %02x:%02x:%02x \t %02x:%02x\n",

         dev_info.bus, dev_info.devid, dev_info.function,

             dev_info.vendor_id, dev_info.device_id);

     /* TX RING */

     mz = slot->m_tx_q;

     ctx->tx_q = mz->addr;

     kni_fifo_init(ctx->tx_q, KNI_FIFO_COUNT_MAX);

     dev_info.tx_phys = mz->phys_addr;

     /* RX RING */

     mz = slot->m_rx_q;

     ctx->rx_q = mz->addr;

     kni_fifo_init(ctx->rx_q, KNI_FIFO_COUNT_MAX);

     dev_info.rx_phys = mz->phys_addr;

     /* ALLOC RING */

     mz = slot->m_alloc_q;

     ctx->alloc_q = mz->addr;

     kni_fifo_init(ctx->alloc_q, KNI_FIFO_COUNT_MAX);

     dev_info.alloc_phys = mz->phys_addr;

     /* FREE RING */

     mz = slot->m_free_q;

     ctx->free_q = mz->addr;

     kni_fifo_init(ctx->free_q, KNI_FIFO_COUNT_MAX);

     dev_info.free_phys = mz->phys_addr;

     /* Request RING */

     mz = slot->m_req_q;

     ctx->req_q = mz->addr;

     kni_fifo_init(ctx->req_q, KNI_FIFO_COUNT_MAX);

     dev_info.req_phys = mz->phys_addr;

     /* Response RING */

     mz = slot->m_resp_q;

     ctx->resp_q = mz->addr;

     kni_fifo_init(ctx->resp_q, KNI_FIFO_COUNT_MAX);

     dev_info.resp_phys = mz->phys_addr;

     /* Req/Resp sync mem area */

     mz = slot->m_sync_addr;

     ctx->sync_addr = mz->addr;

     dev_info.sync_va = mz->addr;

     dev_info.sync_phys = mz->phys_addr;

     /* MBUF mempool */

     snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT,

         pktmbuf_pool->name);

     mz = rte_memzone_lookup(mz_name);

     KNI_MEM_CHECK(mz == NULL);

     mp = (struct rte_mempool *)mz->addr;

     /* KNI currently requires to have only one memory chunk */

     if (mp->nb_mem_chunks != )

         goto kni_fail;

     dev_info.mbuf_va = STAILQ_FIRST(&mp->mem_list)->addr;

     dev_info.mbuf_phys = STAILQ_FIRST(&mp->mem_list)->phys_addr;

     ctx->pktmbuf_pool = pktmbuf_pool;

     ctx->group_id = conf->group_id;

     ctx->slot_id = slot->id;

     ctx->mbuf_size = conf->mbuf_size;

     dev_info.kni_mac = conf->kni_mac;

     ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);

     KNI_MEM_CHECK(ret < );

     ctx->in_use = ;

     /* Allocate mbufs and then put them into alloc_q */

     kni_allocate_mbufs(ctx);

     return ctx;

 kni_fail:

     if (slot)

         kni_memzone_pool_release(&kni_memzone_pool.slots[slot->id]);

     return NULL;

 }

其中ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);就是讲dev_info传入内核。

 static int

 main_loop(__rte_unused void *arg)

 {

     uint8_t i, nb_ports = rte_eth_dev_count();

     int32_t f_stop;

     const unsigned lcore_id = rte_lcore_id();

     enum lcore_rxtx {

         LCORE_NONE,

         LCORE_RX,

         LCORE_TX,

         LCORE_MAX

     };

     enum lcore_rxtx flag = LCORE_NONE;

     for (i = ; i < nb_ports; i++) {

         if (!kni_port_params_array[i])

             continue;

         if (kni_port_params_array[i]->lcore_rx == (uint8_t)lcore_id) {

             flag = LCORE_RX;

             break;

         } else if (kni_port_params_array[i]->lcore_tx ==

                         (uint8_t)lcore_id) {

             flag = LCORE_TX;

             break;

         }

     }

     if (flag == LCORE_RX) {

         RTE_LOG(INFO, APP, "Lcore %u is reading from port %d\n",

                     kni_port_params_array[i]->lcore_rx,

                     kni_port_params_array[i]->port_id);

         while () {

             f_stop = rte_atomic32_read(&kni_stop);

             if (f_stop)

                 break;

             kni_ingress(kni_port_params_array[i]);

         }

     } else if (flag == LCORE_TX) {

         RTE_LOG(INFO, APP, "Lcore %u is writing to port %d\n",

                     kni_port_params_array[i]->lcore_tx,

                     kni_port_params_array[i]->port_id);

         while () {

             f_stop = rte_atomic32_read(&kni_stop);

             if (f_stop)

                 break;

             kni_egress(kni_port_params_array[i]);

         }

     } else

         RTE_LOG(INFO, APP, "Lcore %u has nothing to do\n", lcore_id);

     return ;

 }

进入循环收发包，

 static void

 kni_ingress(struct kni_port_params *p)

 {

     uint8_t i, port_id;

     unsigned nb_rx, num;

     uint32_t nb_kni;

     struct rte_mbuf *pkts_burst[PKT_BURST_SZ];

     if (p == NULL)

         return;

     nb_kni = p->nb_kni;

     port_id = p->port_id;

     for (i = ; i < nb_kni; i++) {

         /* Burst rx from eth */

         nb_rx = rte_eth_rx_burst(port_id, , pkts_burst, PKT_BURST_SZ);

         if (unlikely(nb_rx > PKT_BURST_SZ)) {

             RTE_LOG(ERR, APP, "Error receiving from eth\n");

             return;

         }

         /* Burst tx to kni */

         num = rte_kni_tx_burst(p->kni[i], pkts_burst, nb_rx);

         kni_stats[port_id].rx_packets += num;

         //if(kni_stats[port_id].rx_packets != 0 && kni_stats[port_id].rx_packets%20 == 0 && num > 0)

          //   printf("recv packet num : %"PRIu64"\n",kni_stats[port_id].rx_packets);

         rte_kni_handle_request(p->kni[i]);

         if (unlikely(num < nb_rx)) {

             /* Free mbufs not tx to kni interface */

             kni_burst_free_mbufs(&pkts_burst[num], nb_rx - num);

             kni_stats[port_id].rx_dropped += nb_rx - num;

         }

     }

 }

 static void

 kni_egress(struct kni_port_params *p)

 {

     uint8_t i, port_id;

     unsigned nb_tx, num;

     uint32_t nb_kni;

     struct rte_mbuf *pkts_burst[PKT_BURST_SZ];

     if (p == NULL)

         return;

     nb_kni = p->nb_kni;

     port_id = p->port_id;

     for (i = ; i < nb_kni; i++) {

         /* Burst rx from kni */

         num = rte_kni_rx_burst(p->kni[i], pkts_burst, PKT_BURST_SZ);

         if (unlikely(num > PKT_BURST_SZ)) {

             RTE_LOG(ERR, APP, "Error receiving from KNI\n");

             return;

         }

         /* Burst tx to eth */

         nb_tx = rte_eth_tx_burst(port_id, , pkts_burst, (uint16_t)num);

         kni_stats[port_id].tx_packets += nb_tx;

         if (unlikely(nb_tx < num)) {

             /* Free mbufs not tx to NIC */

             kni_burst_free_mbufs(&pkts_burst[nb_tx], num - nb_tx);

             kni_stats[port_id].tx_dropped += num - nb_tx;

         }

     }

 }

代码就守护在这个kni网口进行收发包。篇幅有限，后面再整理。

dpdk中kni模块的更多相关文章

dpdk中log的使用方法
1 log简介 dpdk中通过log系统记录相关的日志信息,每一条日志除日志内容外,还有两个附加信息,log级别和log类型.开发人员可根据级别和类型对日志信息进行过滤,只记录必要的日志.1.1 ...
隐藏进程中的模块绕过IceSword的检测
标题: [原创] 隐藏进程中的模块绕过IceSword的检测作者: xPLK 时间: 2008-06-19,17:59:11 链接: http://bbs.pediy.com/showthr ...
浅析JS中的模块规范（CommonJS，AMD，CMD）////////////////////////zzzzzz
浅析JS中的模块规范(CommonJS,AMD,CMD) 如果你听过js模块化这个东西,那么你就应该听过或CommonJS或AMD甚至是CMD这些规范咯,我也听过,但之前也真的是听听而已. ...
解决centos7中python-pip模块不存在的问题
centos 7中python-pip模块不存在,是因为像centos这类衍生的发行版,源跟新滞后,或者不存在.即使使用yum去search python-pip也找不到软件包. 为了使用安装滞后或源 ...
Nodejs中cluster模块的多进程共享数据问题
Nodejs中cluster模块的多进程共享数据问题前述 nodejs在v0.6.x之后增加了一个模块cluster用于实现多进程,利用child_process模块来创建和管理进程,增加程序在多核 ...
Python中optionParser模块的使用方法[转］
本文以实例形式较为详尽的讲述了Python中optionParser模块的使用方法,对于深入学习Python有很好的借鉴价值.分享给大家供大家参考之用.具体分析如下: 一般来说,Python中有两个内 ...
python中threading模块详解（一）
python中threading模块详解(一) 来源 http://blog.chinaunix.net/uid-27571599-id-3484048.html threading提供了一个比thr ...
Python中的模块与包
标准库的安装路径在import模块的时候,python是通过系统路径找到这些模块的,我们可以将这些路径打印出来: >>> pprint.pprint(sys.path) ['', ...
【OpenCV】OpenCV中GPU模块使用
CUDA基本使用方法在介绍OpenCV中GPU模块使用之前,先回顾下CUDA的一般使用方法,其基本步骤如下: 1.主机代码执行:2.传输数据到GPU:3.确定grid,block大小: 4.调用内核 ...

随机推荐

XE7/10诡异报错brcc32错误
重新编译工程时,报错: 之前没遇到过,解决方法: 重新设置下Application Icon,再build,问题解决.
Java 程序员容易犯的10个SQL错误
Java程序员编程时需要混合面向对象思维和一般命令式编程的方法,能否完美的将两者结合起来完全得依靠编程人员的水准: 技能(任何人都能容易学会命令式编程) 模式(有些人用“模式-模式”,举个例子,模式可 ...
Java面试题：如何对HashMap按键值排序
Java中HashMap是一种用于存储“键”和“值”信息对的数据结构.不同于Array.ArrayList和LinkedLists,它不会维持插入元素的顺序. 因此,在键或值的基础上排序HashMap ...
Debian上启用Apache2服务
在Debian上启用Apache2的方法如下: sudo apt-get update sudo apt-get install -y apache2 sudo service apache2 sta ...
善待Erlang 代码 -- 巧用 user_default
这是一篇水文 ----------------------------------------------------- 很好用的一个技巧 http://www.erlang.org/doc/man/ ...
GOF23设计模式之建造者模式（builder）
一.建造者模式概述建造者模式的本质: 1.分离了对象子组件的单独构造(由Builder负责)和装配(由Director负责).从而可以构造出复杂的对象.这个模式适用于:某个对象的过程复杂的情况下使用 ...
Jmeter 录制浏览器https请求
Jmeter录制脚本时,跟http脚本录制主要区别是,https录制需要添加安全证书. 一Jmeter代理服务器及证书配置. 1.打开Jmeter,右键测试计划添加线程组,右键工作台→非测试元件→ht ...
java图形用户界面添加图片的代码
package com.aa; import java.awt.Component; import javax.swing.ImageIcon; import javax.swing.JPanel; ...
HTML简历
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/ ...
Velodyne线性激光雷达pcap文件格式及写入、数据解析 Lebal：激光雷达
转载自https://blog.csdn.net/qq_25241325/article/details/80766305 roslaunch loam_velodyne loam_velodyne. ...

dpdk中kni模块

dpdk中kni模块的更多相关文章

随机推荐

热门专题