一,什么是kni,为什么要有kni

Kni(Kernel NIC Interface)内核网卡接口,是DPDK允许用户态和内核态交换报文的解决方案,模拟了一个虚拟的网口,提供dpdk的应用程序和linux内核之间通讯。kni接口允许报文从用户态接收后转发到linu协议栈去。

为什么要弄一个kni接口,虽然dpdk的高速转发性能很出色,但是也有自己的一些缺点,比如没有协议栈就是其中一项缺陷,当然也可能当时设计时就将没有将协议栈考虑进去,毕竟协议栈需要将报文转发处理,可能会使

处理报文的能力大大降低。

直接上图:

上图是kni的mbuf使用流程图,也可以看出报文的流向,因为报文在代码中其实就是一个个内存指针。其中rx_q右边是用户态,左边是内核态。最后通过调用netif_rx()将报文送入linux协议栈,这其中需要将dpdk的mbuf转换成skb_buf。

当linux向kni端口发送报文时,调用回调函数kni_net_tx(),然后报文经过转换之后发送到端口上。

二:主要代码分析:

1,和igb uio模块一样,kni模块分成内核以及用户态代码,内核模块在编译出来之后为rte_kni.ko,首先插入内核,dpdk提供了一个用户态的例子。首先看下kni内核模块代码:

在kni_misc.c中,ko代码入口为

module_init(kni_init);

可以看到函数从kni_init进入:

 static int __init
kni_init(void)
{
int rc; KNI_PRINT("######## DPDK kni module loading ########\n"); if (kni_parse_kthread_mode() < ) { //kni的线程模式、单线程还是多线程
KNI_ERR("Invalid parameter for kthread_mode\n");
return -EINVAL;
} #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
rc = register_pernet_subsys(&kni_net_ops);
#else
rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
#endif
if (rc)
return -EPERM; rc = misc_register(&kni_misc);
if (rc != ) {
KNI_ERR("Misc registration failed\n");
goto out;
} /* Configure the lo mode according to the input parameter */
kni_net_config_lo_mode(lo_mode); KNI_PRINT("######## DPDK kni module loaded ########\n"); return ; out:
#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
unregister_pernet_subsys(&kni_net_ops);
#else
register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
#endif
return rc;
}

代码比较简单,首先选择kni的线程模式,分为单线程还是多线程,所谓单线程是指所有的kni端口收发都由一个线程守护,多线程只是每一个kni端口分为由一个线程守护,这部分是在插入模块时带入参数选择。

接着调用注册函数misc_register,将kni注册为一个混杂设备。其中kni_misc结构体里面定义了该混杂设备的一些操作

 static struct miscdevice kni_misc = {
.minor = MISC_DYNAMIC_MINOR,
.name = KNI_DEVICE,
.fops = &kni_fops,
};

这里主要看.fops里面的结构体

 static struct file_operations kni_fops = {
.owner = THIS_MODULE,
.open = kni_open,
.release = kni_release,
.unlocked_ioctl = (void *)kni_ioctl,
.compat_ioctl = (void *)kni_compat_ioctl,
};

这里涉及的主要操作有kni_open,kni_release,以及kni_ioctl,分别对应几个函数

 static int
kni_open(struct inode *inode, struct file *file)
{
struct net *net = current->nsproxy->net_ns;
struct kni_net *knet = net_generic(net, kni_net_id); /* kni device can be opened by one user only per netns */
if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
return -EBUSY; /* Create kernel thread for single mode */
if (multiple_kthread_on == ) {
KNI_PRINT("Single kernel thread for all KNI devices\n");
/* Create kernel thread for RX */
knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet,
"kni_single");
if (IS_ERR(knet->kni_kthread)) {
KNI_ERR("Unable to create kernel threaed\n");
return PTR_ERR(knet->kni_kthread);
}
} else
KNI_PRINT("Multiple kernel thread mode enabled\n"); file->private_data = get_net(net);
KNI_PRINT("/dev/kni opened\n"); return ;
}

kni_open时如果是单线程模式则会创建一个内核线程,并打开dev/kni,这个时候在host的dev下能看到kni文件夹

 static int
kni_ioctl(struct inode *inode,
unsigned int ioctl_num,
unsigned long ioctl_param)
{
int ret = -EINVAL;
struct net *net = current->nsproxy->net_ns; KNI_DBG("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param); /*
* Switch according to the ioctl called
*/
switch (_IOC_NR(ioctl_num)) {
case _IOC_NR(RTE_KNI_IOCTL_TEST):
/* For test only, not used */
break;
case _IOC_NR(RTE_KNI_IOCTL_CREATE):
ret = kni_ioctl_create(net, ioctl_num, ioctl_param);
break;
case _IOC_NR(RTE_KNI_IOCTL_RELEASE):
ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
break;
default:
KNI_DBG("IOCTL default\n");
break;
} return ret;
}

kni_ioctl函数是与用户态通信的一个接口,主要是的是kni_ioctl_create函数:

 static int
kni_ioctl_create(struct net *net,
unsigned int ioctl_num, unsigned long ioctl_param)
{
struct kni_net *knet = net_generic(net, kni_net_id);
int ret;
struct rte_kni_device_info dev_info;
struct pci_dev *pci = NULL;
struct pci_dev *found_pci = NULL;
struct net_device *net_dev = NULL;
struct net_device *lad_dev = NULL;
struct kni_dev *kni, *dev, *n; printk(KERN_INFO "KNI: Creating kni...\n");
/* Check the buffer size, to avoid warning */
if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
return -EINVAL; /* Copy kni info from user space */
ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
if (ret) {
KNI_ERR("copy_from_user in kni_ioctl_create");
return -EIO;
} /**
* Check if the cpu core id is valid for binding,
* for multiple kernel thread mode.
*/
if (multiple_kthread_on && dev_info.force_bind &&
!cpu_online(dev_info.core_id)) {
KNI_ERR("cpu %u is not online\n", dev_info.core_id);
return -EINVAL;
} /* Check if it has been created */
down_read(&knet->kni_list_lock);
list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
if (kni_check_param(dev, &dev_info) < ) {
up_read(&knet->kni_list_lock);
return -EINVAL;
}
}
up_read(&knet->kni_list_lock); net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
#ifdef NET_NAME_UNKNOWN
NET_NAME_UNKNOWN,
#endif
kni_net_init);
if (net_dev == NULL) {
KNI_ERR("error allocating device \"%s\"\n", dev_info.name);
return -EBUSY;
} dev_net_set(net_dev, net); kni = netdev_priv(net_dev); kni->net_dev = net_dev;
kni->group_id = dev_info.group_id;
kni->core_id = dev_info.core_id;
strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE); /* Translate user space info into kernel space info */
kni->tx_q = phys_to_virt(dev_info.tx_phys);
kni->rx_q = phys_to_virt(dev_info.rx_phys);
kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
kni->free_q = phys_to_virt(dev_info.free_phys); kni->req_q = phys_to_virt(dev_info.req_phys);
kni->resp_q = phys_to_virt(dev_info.resp_phys);
kni->sync_va = dev_info.sync_va;
kni->sync_kva = phys_to_virt(dev_info.sync_phys); kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys);
kni->mbuf_va = dev_info.mbuf_va; #ifdef RTE_KNI_VHOST
kni->vhost_queue = NULL;
kni->vq_status = BE_STOP;
#endif
kni->mbuf_size = dev_info.mbuf_size; KNI_PRINT("tx_phys: 0x%016llx, tx_q addr: 0x%p\n",
(unsigned long long) dev_info.tx_phys, kni->tx_q);
KNI_PRINT("rx_phys: 0x%016llx, rx_q addr: 0x%p\n",
(unsigned long long) dev_info.rx_phys, kni->rx_q);
KNI_PRINT("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n",
(unsigned long long) dev_info.alloc_phys, kni->alloc_q);
KNI_PRINT("free_phys: 0x%016llx, free_q addr: 0x%p\n",
(unsigned long long) dev_info.free_phys, kni->free_q);
KNI_PRINT("req_phys: 0x%016llx, req_q addr: 0x%p\n",
(unsigned long long) dev_info.req_phys, kni->req_q);
KNI_PRINT("resp_phys: 0x%016llx, resp_q addr: 0x%p\n",
(unsigned long long) dev_info.resp_phys, kni->resp_q);
KNI_PRINT("mbuf_phys: 0x%016llx, mbuf_kva: 0x%p\n",
(unsigned long long) dev_info.mbuf_phys, kni->mbuf_kva);
KNI_PRINT("mbuf_va: 0x%p\n", dev_info.mbuf_va);
KNI_PRINT("mbuf_size: %u\n", kni->mbuf_size); KNI_DBG("PCI: %02x:%02x.%02x %04x:%04x\n",
dev_info.bus,
dev_info.devid,
dev_info.function,
dev_info.vendor_id,
dev_info.device_id); pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL); /* Support Ethtool */
while (pci) {
KNI_PRINT("pci_bus: %02x:%02x:%02x \n",
pci->bus->number,
PCI_SLOT(pci->devfn),
PCI_FUNC(pci->devfn)); if ((pci->bus->number == dev_info.bus) &&
(PCI_SLOT(pci->devfn) == dev_info.devid) &&
(PCI_FUNC(pci->devfn) == dev_info.function)) {
found_pci = pci;
switch (dev_info.device_id) {
#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
#include <rte_pci_dev_ids.h>
ret = igb_kni_probe(found_pci, &lad_dev);
break;
#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) \
case (dev):
#include <rte_pci_dev_ids.h>
ret = ixgbe_kni_probe(found_pci, &lad_dev);
break;
default:
ret = -;
break;
} KNI_DBG("PCI found: pci=0x%p, lad_dev=0x%p\n",
pci, lad_dev);
if (ret == ) {
kni->lad_dev = lad_dev;
kni_set_ethtool_ops(kni->net_dev);
} else {
KNI_ERR("Device not supported by ethtool");
kni->lad_dev = NULL;
} kni->pci_dev = found_pci;
kni->device_id = dev_info.device_id;
break;
}
pci = pci_get_device(dev_info.vendor_id,
dev_info.device_id, pci);
}
if (pci)
pci_dev_put(pci); if (kni->lad_dev)
memcpy(net_dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
else
/*
* Generate random mac address. eth_random_addr() is the newer
* version of generating mac address in linux kernel.
*/
//random_ether_addr(net_dev->dev_addr);
memcpy(net_dev->dev_addr, &dev_info.kni_mac,ETH_ALEN); ret = register_netdev(net_dev);
if (ret) {
KNI_ERR("error %i registering device \"%s\"\n",
ret, dev_info.name);
kni_dev_remove(kni);
return -ENODEV;
} #ifdef RTE_KNI_VHOST
kni_vhost_init(kni);
#endif /**
* Create a new kernel thread for multiple mode, set its core affinity,
* and finally wake it up.
*/
if (multiple_kthread_on) {
kni->pthread = kthread_create(kni_thread_multiple,
(void *)kni,
"kni_%s", kni->name);
if (IS_ERR(kni->pthread)) {
kni_dev_remove(kni);
return -ECANCELED;
}
if (dev_info.force_bind)
kthread_bind(kni->pthread, kni->core_id);
wake_up_process(kni->pthread);
} down_write(&knet->kni_list_lock);
list_add(&kni->list, &knet->kni_list_head);
up_write(&knet->kni_list_lock); return ;
}

ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));这条语句会拷贝从用户态传过来的消息,dev_info主要存放了虚拟kni网口的相关参数,接下来就会根据dev_info中的参数注册一个kni网口ret = register_netdev(net_dev);

这个函数完成创建,这样就虚拟出一个网口出来。其中165行是自己修改的,因为我发现按照文档提供的方法根本不能ping通报文,我将生成kni的mac地址修改成dpdk接管的网口mac即可贯通。原生态代码是随时生成一个mac。

2,用户态代码主要分析dpdk提供的example,

 int
main(int argc, char** argv)
{
int ret;
uint8_t nb_sys_ports, port;
unsigned i; /* Associate signal_hanlder function with USR signals */
signal(SIGUSR1, signal_handler);
signal(SIGUSR2, signal_handler);
signal(SIGRTMIN, signal_handler);
signal(SIGINT, signal_handler); /* Initialise EAL */
ret = rte_eal_init(argc, argv);
if (ret < )
rte_exit(EXIT_FAILURE, "Could not initialise EAL (%d)\n", ret);
argc -= ret;
argv += ret; /* Parse application arguments (after the EAL ones) */
ret = parse_args(argc, argv);
if (ret < )
rte_exit(EXIT_FAILURE, "Could not parse input parameters\n"); /* Create the mbuf pool */
pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF,
MEMPOOL_CACHE_SZ, , MBUF_DATA_SZ, rte_socket_id());
if (pktmbuf_pool == NULL) {
rte_exit(EXIT_FAILURE, "Could not initialise mbuf pool\n");
return -;
} /* Get number of ports found in scan */
nb_sys_ports = rte_eth_dev_count();
if (nb_sys_ports == )
rte_exit(EXIT_FAILURE, "No supported Ethernet device found\n"); /* Check if the configured port ID is valid */
for (i = ; i < RTE_MAX_ETHPORTS; i++)
if (kni_port_params_array[i] && i >= nb_sys_ports)
rte_exit(EXIT_FAILURE, "Configured invalid "
"port ID %u\n", i); /* Initialize KNI subsystem */
init_kni(); /* Initialise each port */
for (port = ; port < nb_sys_ports; port++) {
/* Skip ports that are not enabled */
if (!(ports_mask & ( << port)))
continue;
init_port(port); if (port >= RTE_MAX_ETHPORTS)
rte_exit(EXIT_FAILURE, "Can not use more than "
"%d ports for kni\n", RTE_MAX_ETHPORTS); kni_alloc(port);
}
check_all_ports_link_status(nb_sys_ports, ports_mask); /* Launch per-lcore function on every lcore */
rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
RTE_LCORE_FOREACH_SLAVE(i) {
if (rte_eal_wait_lcore(i) < )
return -;
} /* Release resources */
for (port = ; port < nb_sys_ports; port++) {
if (!(ports_mask & ( << port)))
continue;
kni_free_kni(port);
}
#ifdef RTE_LIBRTE_XEN_DOM0
rte_kni_close();
#endif
for (i = ; i < RTE_MAX_ETHPORTS; i++)
if (kni_port_params_array[i]) {
rte_free(kni_port_params_array[i]);
kni_port_params_array[i] = NULL;
} return ;
}

main函数进来进行一些eal的初始化,随后创建一个pktmbuf_pool,重点看一下init_kni();以及kni_alloc(port);rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);函数。其中init_kni()函数是初始化kni子系统

 static void
init_kni(void)
{
unsigned int num_of_kni_ports = , i;
struct kni_port_params **params = kni_port_params_array; /* Calculate the maximum number of KNI interfaces that will be used */
for (i = ; i < RTE_MAX_ETHPORTS; i++) {
if (kni_port_params_array[i]) {
num_of_kni_ports += (params[i]->nb_lcore_k ?
params[i]->nb_lcore_k : );
}
} /* Invoke rte KNI init to preallocate the ports */
rte_kni_init(num_of_kni_ports);
}

主要代码在rte_kni_init里面

 void
rte_kni_init(unsigned int max_kni_ifaces)
{
uint32_t i;
struct rte_kni_memzone_slot *it;
const struct rte_memzone *mz;
#define OBJNAMSIZ 32
char obj_name[OBJNAMSIZ];
char mz_name[RTE_MEMZONE_NAMESIZE]; /* Immediately return if KNI is already initialized */
if (kni_memzone_pool.initialized) {
RTE_LOG(WARNING, KNI, "Double call to rte_kni_init()");
return;
} if (max_kni_ifaces == ) {
RTE_LOG(ERR, KNI, "Invalid number of max_kni_ifaces %d\n",
max_kni_ifaces);
rte_panic("Unable to initialize KNI\n");
} /* Check FD and open */
if (kni_fd < ) {
kni_fd = open("/dev/" KNI_DEVICE, O_RDWR);
if (kni_fd < )
rte_panic("Can not open /dev/%s\n", KNI_DEVICE);
} /* Allocate slot objects */
kni_memzone_pool.slots = (struct rte_kni_memzone_slot *)
rte_malloc(NULL,
sizeof(struct rte_kni_memzone_slot) *
max_kni_ifaces,
);
KNI_MEM_CHECK(kni_memzone_pool.slots == NULL); /* Initialize general pool variables */
kni_memzone_pool.initialized = ;
kni_memzone_pool.max_ifaces = max_kni_ifaces;
kni_memzone_pool.free = &kni_memzone_pool.slots[];
rte_spinlock_init(&kni_memzone_pool.mutex); /* Pre-allocate all memzones of all the slots; panic on error */
for (i = ; i < max_kni_ifaces; i++) { /* Recover current slot */
it = &kni_memzone_pool.slots[i];
it->id = i; /* Allocate KNI context */
snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%d", i);
mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni),
SOCKET_ID_ANY, );
KNI_MEM_CHECK(mz == NULL);
it->m_ctx = mz; /* TX RING */
snprintf(obj_name, OBJNAMSIZ, "kni_tx_%d", i);
mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
SOCKET_ID_ANY, );
KNI_MEM_CHECK(mz == NULL);
it->m_tx_q = mz; /* RX RING */
snprintf(obj_name, OBJNAMSIZ, "kni_rx_%d", i);
mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
SOCKET_ID_ANY, );
KNI_MEM_CHECK(mz == NULL);
it->m_rx_q = mz; /* ALLOC RING */
snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%d", i);
mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
SOCKET_ID_ANY, );
KNI_MEM_CHECK(mz == NULL);
it->m_alloc_q = mz; /* FREE RING */
snprintf(obj_name, OBJNAMSIZ, "kni_free_%d", i);
mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
SOCKET_ID_ANY, );
KNI_MEM_CHECK(mz == NULL);
it->m_free_q = mz; /* Request RING */
snprintf(obj_name, OBJNAMSIZ, "kni_req_%d", i);
mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
SOCKET_ID_ANY, );
KNI_MEM_CHECK(mz == NULL);
it->m_req_q = mz; /* Response RING */
snprintf(obj_name, OBJNAMSIZ, "kni_resp_%d", i);
mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
SOCKET_ID_ANY, );
KNI_MEM_CHECK(mz == NULL);
it->m_resp_q = mz; /* Req/Resp sync mem area */
snprintf(obj_name, OBJNAMSIZ, "kni_sync_%d", i);
mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
SOCKET_ID_ANY, );
KNI_MEM_CHECK(mz == NULL);
it->m_sync_addr = mz; if ((i+) == max_kni_ifaces) {
it->next = NULL;
kni_memzone_pool.free_tail = it;
} else
it->next = &kni_memzone_pool.slots[i+];
} return; kni_fail:
rte_panic("Unable to allocate memory for max_kni_ifaces:%d. Increase the amount of hugepages memory\n",
max_kni_ifaces);
}

对上图中所有的fifo分配内存。

 static int
kni_alloc(uint8_t port_id)
{
uint8_t i;
struct rte_kni *kni;
struct rte_kni_conf conf;
struct kni_port_params **params = kni_port_params_array; if (port_id >= RTE_MAX_ETHPORTS || !params[port_id])
return -; params[port_id]->nb_kni = params[port_id]->nb_lcore_k ?
params[port_id]->nb_lcore_k : ; for (i = ; i < params[port_id]->nb_kni; i++) {
/* Clear conf at first */
memset(&conf, , sizeof(conf));
if (params[port_id]->nb_lcore_k) {
snprintf(conf.name, RTE_KNI_NAMESIZE,
"vEth%u_%u", port_id, i);
conf.core_id = params[port_id]->lcore_k[i];
conf.force_bind = ;
} else
snprintf(conf.name, RTE_KNI_NAMESIZE,
"vEth%u", port_id);
conf.group_id = (uint16_t)port_id;
conf.mbuf_size = MAX_PACKET_SZ;
rte_eth_macaddr_get(port_id, (struct ether_addr *)&conf.kni_mac);
/*
* The first KNI device associated to a port
* is the master, for multiple kernel thread
* environment.
*/
if (i == ) {
struct rte_kni_ops ops;
struct rte_eth_dev_info dev_info; memset(&dev_info, , sizeof(dev_info));
rte_eth_dev_info_get(port_id, &dev_info);
conf.addr = dev_info.pci_dev->addr;
conf.id = dev_info.pci_dev->id; memset(&ops, , sizeof(ops));
ops.port_id = port_id;
ops.change_mtu = kni_change_mtu;
ops.config_network_if = kni_config_network_interface; kni = rte_kni_alloc(pktmbuf_pool, &conf, &ops);
} else
kni = rte_kni_alloc(pktmbuf_pool, &conf, NULL); if (!kni)
rte_exit(EXIT_FAILURE, "Fail to create kni for "
"port: %d\n", port_id);
params[port_id]->kni[i] = kni;
} return ;
}
 struct rte_kni *
rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
const struct rte_kni_conf *conf,
struct rte_kni_ops *ops)
{
int ret;
struct rte_kni_device_info dev_info;
struct rte_kni *ctx;
char intf_name[RTE_KNI_NAMESIZE];
char mz_name[RTE_MEMZONE_NAMESIZE];
const struct rte_memzone *mz;
const struct rte_mempool *mp;
struct rte_kni_memzone_slot *slot = NULL; if (!pktmbuf_pool || !conf || !conf->name[])
return NULL; /* Check if KNI subsystem has been initialized */
if (kni_memzone_pool.initialized != ) {
RTE_LOG(ERR, KNI, "KNI subsystem has not been initialized. Invoke rte_kni_init() first\n");
return NULL;
} /* Get an available slot from the pool */
slot = kni_memzone_pool_alloc();
if (!slot) {
RTE_LOG(ERR, KNI, "Cannot allocate more KNI interfaces; increase the number of max_kni_ifaces(current %d) or release unusued ones.\n",
kni_memzone_pool.max_ifaces);
return NULL;
} /* Recover ctx */
ctx = slot->m_ctx->addr;
snprintf(intf_name, RTE_KNI_NAMESIZE, "%s", conf->name); if (ctx->in_use) {
RTE_LOG(ERR, KNI, "KNI %s is in use\n", ctx->name);
return NULL;
}
memset(ctx, , sizeof(struct rte_kni));
if (ops)
memcpy(&ctx->ops, ops, sizeof(struct rte_kni_ops)); memset(&dev_info, , sizeof(dev_info));
dev_info.bus = conf->addr.bus;
dev_info.devid = conf->addr.devid;
dev_info.function = conf->addr.function;
dev_info.vendor_id = conf->id.vendor_id;
dev_info.device_id = conf->id.device_id;
dev_info.core_id = conf->core_id;
dev_info.force_bind = conf->force_bind;
dev_info.group_id = conf->group_id;
dev_info.mbuf_size = conf->mbuf_size; snprintf(ctx->name, RTE_KNI_NAMESIZE, "%s", intf_name);
snprintf(dev_info.name, RTE_KNI_NAMESIZE, "%s", intf_name); RTE_LOG(INFO, KNI, "pci: %02x:%02x:%02x \t %02x:%02x\n",
dev_info.bus, dev_info.devid, dev_info.function,
dev_info.vendor_id, dev_info.device_id);
/* TX RING */
mz = slot->m_tx_q;
ctx->tx_q = mz->addr;
kni_fifo_init(ctx->tx_q, KNI_FIFO_COUNT_MAX);
dev_info.tx_phys = mz->phys_addr; /* RX RING */
mz = slot->m_rx_q;
ctx->rx_q = mz->addr;
kni_fifo_init(ctx->rx_q, KNI_FIFO_COUNT_MAX);
dev_info.rx_phys = mz->phys_addr; /* ALLOC RING */
mz = slot->m_alloc_q;
ctx->alloc_q = mz->addr;
kni_fifo_init(ctx->alloc_q, KNI_FIFO_COUNT_MAX);
dev_info.alloc_phys = mz->phys_addr; /* FREE RING */
mz = slot->m_free_q;
ctx->free_q = mz->addr;
kni_fifo_init(ctx->free_q, KNI_FIFO_COUNT_MAX);
dev_info.free_phys = mz->phys_addr; /* Request RING */
mz = slot->m_req_q;
ctx->req_q = mz->addr;
kni_fifo_init(ctx->req_q, KNI_FIFO_COUNT_MAX);
dev_info.req_phys = mz->phys_addr; /* Response RING */
mz = slot->m_resp_q;
ctx->resp_q = mz->addr;
kni_fifo_init(ctx->resp_q, KNI_FIFO_COUNT_MAX);
dev_info.resp_phys = mz->phys_addr; /* Req/Resp sync mem area */
mz = slot->m_sync_addr;
ctx->sync_addr = mz->addr;
dev_info.sync_va = mz->addr;
dev_info.sync_phys = mz->phys_addr; /* MBUF mempool */
snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT,
pktmbuf_pool->name);
mz = rte_memzone_lookup(mz_name);
KNI_MEM_CHECK(mz == NULL);
mp = (struct rte_mempool *)mz->addr;
/* KNI currently requires to have only one memory chunk */
if (mp->nb_mem_chunks != )
goto kni_fail; dev_info.mbuf_va = STAILQ_FIRST(&mp->mem_list)->addr;
dev_info.mbuf_phys = STAILQ_FIRST(&mp->mem_list)->phys_addr;
ctx->pktmbuf_pool = pktmbuf_pool;
ctx->group_id = conf->group_id;
ctx->slot_id = slot->id;
ctx->mbuf_size = conf->mbuf_size; dev_info.kni_mac = conf->kni_mac; ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
KNI_MEM_CHECK(ret < ); ctx->in_use = ; /* Allocate mbufs and then put them into alloc_q */
kni_allocate_mbufs(ctx); return ctx; kni_fail:
if (slot)
kni_memzone_pool_release(&kni_memzone_pool.slots[slot->id]); return NULL;
}

其中ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);就是讲dev_info传入内核。

 static int
main_loop(__rte_unused void *arg)
{
uint8_t i, nb_ports = rte_eth_dev_count();
int32_t f_stop;
const unsigned lcore_id = rte_lcore_id();
enum lcore_rxtx {
LCORE_NONE,
LCORE_RX,
LCORE_TX,
LCORE_MAX
};
enum lcore_rxtx flag = LCORE_NONE; for (i = ; i < nb_ports; i++) {
if (!kni_port_params_array[i])
continue;
if (kni_port_params_array[i]->lcore_rx == (uint8_t)lcore_id) {
flag = LCORE_RX;
break;
} else if (kni_port_params_array[i]->lcore_tx ==
(uint8_t)lcore_id) {
flag = LCORE_TX;
break;
}
} if (flag == LCORE_RX) {
RTE_LOG(INFO, APP, "Lcore %u is reading from port %d\n",
kni_port_params_array[i]->lcore_rx,
kni_port_params_array[i]->port_id);
while () {
f_stop = rte_atomic32_read(&kni_stop);
if (f_stop)
break;
kni_ingress(kni_port_params_array[i]);
}
} else if (flag == LCORE_TX) {
RTE_LOG(INFO, APP, "Lcore %u is writing to port %d\n",
kni_port_params_array[i]->lcore_tx,
kni_port_params_array[i]->port_id);
while () {
f_stop = rte_atomic32_read(&kni_stop);
if (f_stop)
break;
kni_egress(kni_port_params_array[i]);
}
} else
RTE_LOG(INFO, APP, "Lcore %u has nothing to do\n", lcore_id); return ;
}

进入循环收发包,

 static void
kni_ingress(struct kni_port_params *p)
{
uint8_t i, port_id;
unsigned nb_rx, num;
uint32_t nb_kni;
struct rte_mbuf *pkts_burst[PKT_BURST_SZ]; if (p == NULL)
return; nb_kni = p->nb_kni;
port_id = p->port_id;
for (i = ; i < nb_kni; i++) {
/* Burst rx from eth */
nb_rx = rte_eth_rx_burst(port_id, , pkts_burst, PKT_BURST_SZ);
if (unlikely(nb_rx > PKT_BURST_SZ)) {
RTE_LOG(ERR, APP, "Error receiving from eth\n");
return;
}
/* Burst tx to kni */
num = rte_kni_tx_burst(p->kni[i], pkts_burst, nb_rx);
kni_stats[port_id].rx_packets += num;
//if(kni_stats[port_id].rx_packets != 0 && kni_stats[port_id].rx_packets%20 == 0 && num > 0)
// printf("recv packet num : %"PRIu64"\n",kni_stats[port_id].rx_packets);
rte_kni_handle_request(p->kni[i]);
if (unlikely(num < nb_rx)) {
/* Free mbufs not tx to kni interface */
kni_burst_free_mbufs(&pkts_burst[num], nb_rx - num);
kni_stats[port_id].rx_dropped += nb_rx - num;
}
}
}
 static void
kni_egress(struct kni_port_params *p)
{
uint8_t i, port_id;
unsigned nb_tx, num;
uint32_t nb_kni;
struct rte_mbuf *pkts_burst[PKT_BURST_SZ]; if (p == NULL)
return; nb_kni = p->nb_kni;
port_id = p->port_id;
for (i = ; i < nb_kni; i++) {
/* Burst rx from kni */
num = rte_kni_rx_burst(p->kni[i], pkts_burst, PKT_BURST_SZ);
if (unlikely(num > PKT_BURST_SZ)) {
RTE_LOG(ERR, APP, "Error receiving from KNI\n");
return;
}
/* Burst tx to eth */
nb_tx = rte_eth_tx_burst(port_id, , pkts_burst, (uint16_t)num);
kni_stats[port_id].tx_packets += nb_tx;
if (unlikely(nb_tx < num)) {
/* Free mbufs not tx to NIC */
kni_burst_free_mbufs(&pkts_burst[nb_tx], num - nb_tx);
kni_stats[port_id].tx_dropped += num - nb_tx;
}
}
}

代码就守护在这个kni网口进行收发包。篇幅有限,后面再整理。

dpdk中kni模块的更多相关文章

  1. dpdk中log的使用方法

    1 log简介    dpdk中通过log系统记录相关的日志信息,每一条日志除日志内容外,还有两个附加信息,log级别和log类型.开发人员可根据级别和类型对日志信息进行过滤,只记录必要的日志.1.1 ...

  2. 隐藏进程中的模块绕过IceSword的检测

    标 题: [原创] 隐藏进程中的模块绕过IceSword的检测 作 者: xPLK 时 间: 2008-06-19,17:59:11 链 接: http://bbs.pediy.com/showthr ...

  3. 浅析JS中的模块规范(CommonJS,AMD,CMD)////////////////////////zzzzzz

    浅析JS中的模块规范(CommonJS,AMD,CMD)   如果你听过js模块化这个东西,那么你就应该听过或CommonJS或AMD甚至是CMD这些规范咯,我也听过,但之前也真的是听听而已.     ...

  4. 解决centos7中python-pip模块不存在的问题

    centos 7中python-pip模块不存在,是因为像centos这类衍生的发行版,源跟新滞后,或者不存在.即使使用yum去search python-pip也找不到软件包. 为了使用安装滞后或源 ...

  5. Nodejs中cluster模块的多进程共享数据问题

    Nodejs中cluster模块的多进程共享数据问题 前述 nodejs在v0.6.x之后增加了一个模块cluster用于实现多进程,利用child_process模块来创建和管理进程,增加程序在多核 ...

  6. Python中optionParser模块的使用方法[转]

    本文以实例形式较为详尽的讲述了Python中optionParser模块的使用方法,对于深入学习Python有很好的借鉴价值.分享给大家供大家参考之用.具体分析如下: 一般来说,Python中有两个内 ...

  7. python中threading模块详解(一)

    python中threading模块详解(一) 来源 http://blog.chinaunix.net/uid-27571599-id-3484048.html threading提供了一个比thr ...

  8. Python中的模块与包

    标准库的安装路径 在import模块的时候,python是通过系统路径找到这些模块的,我们可以将这些路径打印出来: >>> pprint.pprint(sys.path) ['', ...

  9. 【OpenCV】OpenCV中GPU模块使用

    CUDA基本使用方法 在介绍OpenCV中GPU模块使用之前,先回顾下CUDA的一般使用方法,其基本步骤如下: 1.主机代码执行:2.传输数据到GPU:3.确定grid,block大小: 4.调用内核 ...

随机推荐

  1. bzoj 4806 炮

    Written with StackEdit. Description 众所周知,双炮叠叠将是中国象棋中很厉害的一招必杀技.炮吃子时必须隔一个棋子跳吃,即俗称"炮打隔子". 炮跟炮 ...

  2. ArcGIS Runtime SDK for WPF之测量距离和面积

    bu不多说,上代码 using System.Windows; using ESRI.ArcGIS.Client; using ESRI.ArcGIS.Client.Tasks; using ESRI ...

  3. 【sqlite】基础知识

    最近做一个数控系统的项目,winCE嵌入式操作系统+.Net Compact Framework环境+VS2008开发平台,开发的设备程序部署到winCE系统下的设备中运行.. 个年头,SQLite也 ...

  4. 学习动态性能表(3)--v$sql&v$sql_plan

    学习动态性能表 第三篇-(1)-v$sq 2007.5.25 V$SQL中存储具体的SQL语句. 一条语句可以映射多个cursor,因为对象所指的cursor可以有不同用户(如例1).如果有多个cur ...

  5. ubuntu下网卡eth1如何修改为eth0

    正常来说,Linux在识别网卡时第一张会是eth0,第二张才是eth1.有时候我们使用虚拟机克隆技术后网卡的信息就会改变,新克隆出来的虚拟主机网卡名字可能变为eth1.无论我们怎么修改都无法改变,这就 ...

  6. jquery 实现点击颜色切换

    <!DOCTYPE html> <html> <head> <meta http-equiv="Content-Type" content ...

  7. DSP SYS/BIOS开发

    https://blog.csdn.net/lg1259156776/article/details/80695318

  8. LevelDB Cache实现机制分析

    几天前淘宝量子恒道在博客上分析了HBase的Cache机制,本篇文章,结合LevelDB 1.7.0版本的源码,分析下LevelDB的Cache机制. 概述 LevelDB是Google开源的持久化K ...

  9. 第14 章 Spring MVC的工作机制与设计模式

    14.1 Spring MVC的总体设计 要使用SPring MVC,只要在web.xml中配置一个DispatcherServlet. 再定义一个dispatcherServlet-servlet. ...

  10. MySQL 性能测试经验

    一.背景 近期在进行资源调度管理平台的重构工作,其中的Resource/Property数据库设计,在没有更加优化的方案前,打算沿用当前平台的数据库结构:这就需要对当前平台的数据库结构进行剖析研究,并 ...