DPDK l3fwd
l3fwd负责三层转发,比l2fwd要复杂点。
- /*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- #include <stdio.h>
- #include <stdlib.h>
- #include <stdint.h>
- #include <inttypes.h>
- #include <sys/types.h>
- #include <string.h>
- #include <sys/queue.h>
- #include <stdarg.h>
- #include <errno.h>
- #include <getopt.h>
- #include <rte_common.h>
- #include <rte_vect.h>
- #include <rte_byteorder.h>
- #include <rte_log.h>
- #include <rte_memory.h>
- #include <rte_memcpy.h>
- #include <rte_memzone.h>
- #include <rte_eal.h>
- #include <rte_per_lcore.h>
- #include <rte_launch.h>
- #include <rte_atomic.h>
- #include <rte_cycles.h>
- #include <rte_prefetch.h>
- #include <rte_lcore.h>
- #include <rte_per_lcore.h>
- #include <rte_branch_prediction.h>
- #include <rte_interrupts.h>
- #include <rte_pci.h>
- #include <rte_random.h>
- #include <rte_debug.h>
- #include <rte_ether.h>
- #include <rte_ethdev.h>
- #include <rte_ring.h>
- #include <rte_mempool.h>
- #include <rte_mbuf.h>
- #include <rte_ip.h>
- #include <rte_tcp.h>
- #include <rte_udp.h>
- #include <rte_string_fns.h>
- #define APP_LOOKUP_EXACT_MATCH 0
- #define APP_LOOKUP_LPM 1
- #define DO_RFC_1812_CHECKS
- #ifndef APP_LOOKUP_METHOD //默认使用LPM来路由
- #define APP_LOOKUP_METHOD APP_LOOKUP_LPM
- #endif
- /*
- * 0表示未优化 When set to zero, simple forwaring path is eanbled.
- * 1表示优化 When set to one, optimized forwarding path is enabled.
- * LPM会用到SSE4.1特性 Note that LPM optimisation path uses SSE4.1 instructions.
- * 注意: 发现深圳测试机的CPU支持的是SSE 4.2特性,不知道会不会有影响呢???
- */
- #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && !defined(__SSE4_1__))
- #define ENABLE_MULTI_BUFFER_OPTIMIZE 0
- #else
- #define ENABLE_MULTI_BUFFER_OPTIMIZE 1
- #endif
- #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
- #include <rte_hash.h>
- #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
- #include <rte_lpm.h>
- #include <rte_lpm6.h>
- #else
- #error "APP_LOOKUP_METHOD set to incorrect value"
- #endif
- #ifndef IPv6_BYTES
- #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\
- "%02x%02x:%02x%02x:%02x%02x:%02x%02x"
- #define IPv6_BYTES(addr) \
- addr[], addr[], addr[], addr[], \
- addr[], addr[], addr[], addr[], \
- addr[], addr[], addr[], addr[],\
- addr[], addr[],addr[], addr[]
- #endif
- #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1
- #define MAX_JUMBO_PKT_LEN 9600
- #define IPV6_ADDR_LEN 16
- #define MEMPOOL_CACHE_SIZE 256
- #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
- /*
- * This expression is used to calculate the number of mbufs needed depending on user input, taking
- * into account memory for rx and tx hardware rings, cache per lcore and mtable per port per lcore.
- * RTE_MAX is used to ensure that NB_MBUF never goes below a minimum value of 8192
- */
- #define NB_MBUF RTE_MAX ( \
- (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \
- nb_ports*nb_lcores*MAX_PKT_BURST + \
- nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \
- nb_lcores*MEMPOOL_CACHE_SIZE), \
- (unsigned))
- #define MAX_PKT_BURST 32
- #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
- /*
- * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send.
- */
- #define MAX_TX_BURST (MAX_PKT_BURST / 2)
- #define NB_SOCKETS 8
- /* Configure how many packets ahead to prefetch, when reading packets */
- #define PREFETCH_OFFSET 3
- /* Used to mark destination port as 'invalid'. */
- #define BAD_PORT ((uint16_t)-1)
- #define FWDSTEP 4
- /*
- * Configurable number of RX/TX ring descriptors
- */
- #define RTE_TEST_RX_DESC_DEFAULT 128
- #define RTE_TEST_TX_DESC_DEFAULT 512
- static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
- static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
- /* ethernet addresses of ports */
- static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
- static __m128i val_eth[RTE_MAX_ETHPORTS];
- /* replace first 12B of the ethernet header. */
- #define MASK_ETH 0x3f
- /* mask of enabled ports */
- static uint32_t enabled_port_mask = ;
- static int promiscuous_on = ; /**< Ports set in promiscuous mode off by default. */
- static int numa_on = ; /**< NUMA is enabled by default. */
- #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
- static int ipv6 = ; /**< ipv6 is false by default. */
- #endif
- struct mbuf_table {
- uint16_t len; //实际个数???
- struct rte_mbuf *m_table[MAX_PKT_BURST];
- };
- struct lcore_rx_queue {
- uint8_t port_id; //物理端口的编号
- uint8_t queue_id;//网卡队列的编号
- } __rte_cache_aligned;
- #define MAX_RX_QUEUE_PER_LCORE 16 //每个lcore上最多有16个接收队列
- #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS //每个物理端口上最多32个发送队列
- #define MAX_RX_QUEUE_PER_PORT 128 //每个物理端口上最多128个接收队列
- #define MAX_LCORE_PARAMS 1024
- struct lcore_params {
- uint8_t port_id; //物理端口的编号
- uint8_t queue_id; //网卡队列的编号
- uint8_t lcore_id; //lcore的编号
- } __rte_cache_aligned;
- static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];//最大1024
- //此处可以修改lcore的默认配置
- static struct lcore_params lcore_params_array_default[] = {
- {, , },//物理端口的编号,网卡队列的编号,lcore的编号
- {, , },
- {, , },
- {, , },
- {, , },
- {, , },
- {, , },
- {, , },
- {, , },
- };
- static struct lcore_params * lcore_params = lcore_params_array_default;
- static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) /
- sizeof(lcore_params_array_default[]);//默认值为9
- static struct rte_eth_conf port_conf = {
- .rxmode = {
- .mq_mode = ETH_MQ_RX_RSS, //看起来l3fwd支持RSS哟
- .max_rx_pkt_len = ETHER_MAX_LEN,
- .split_hdr_size = ,
- .header_split = , /**< Header Split disabled */
- .hw_ip_checksum = , /**< IP checksum offload enabled */
- .hw_vlan_filter = , /**< VLAN filtering disabled */
- .jumbo_frame = , /**< Jumbo Frame Support disabled */
- .hw_strip_crc = , /**< CRC stripped by hardware */
- },
- .rx_adv_conf = {
- .rss_conf = {
- .rss_key = NULL,
- .rss_hf = ETH_RSS_IP,
- },
- },
- .txmode = {
- .mq_mode = ETH_MQ_TX_NONE,
- },
- };
- static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
- #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
- #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
- #include <rte_hash_crc.h>
- #define DEFAULT_HASH_FUNC rte_hash_crc
- #else
- #include <rte_jhash.h>
- #define DEFAULT_HASH_FUNC rte_jhash
- #endif
- struct ipv4_5tuple { //五元组
- uint32_t ip_dst; //目的ip地址
- uint32_t ip_src; //源ip地址
- uint16_t port_dst; //目的端口号
- uint16_t port_src; //源端口号
- uint8_t proto; //传输层协议类型
- } __attribute__((__packed__));
- union ipv4_5tuple_host {
- struct {
- uint8_t pad0;
- uint8_t proto;
- uint16_t pad1;
- uint32_t ip_src;
- uint32_t ip_dst;
- uint16_t port_src;
- uint16_t port_dst;
- };
- __m128i xmm;
- };
- #define XMM_NUM_IN_IPV6_5TUPLE 3
- struct ipv6_5tuple {
- uint8_t ip_dst[IPV6_ADDR_LEN];
- uint8_t ip_src[IPV6_ADDR_LEN];
- uint16_t port_dst;
- uint16_t port_src;
- uint8_t proto;
- } __attribute__((__packed__));
- union ipv6_5tuple_host {
- struct {
- uint16_t pad0;
- uint8_t proto;
- uint8_t pad1;
- uint8_t ip_src[IPV6_ADDR_LEN];
- uint8_t ip_dst[IPV6_ADDR_LEN];
- uint16_t port_src;
- uint16_t port_dst;
- uint64_t reserve;
- };
- __m128i xmm[XMM_NUM_IN_IPV6_5TUPLE];
- };
- struct ipv4_l3fwd_route {
- struct ipv4_5tuple key;
- uint8_t if_out;
- };
- struct ipv6_l3fwd_route {
- struct ipv6_5tuple key; u
- int8_t if_out;
- };
- //这里设置默认的静态的三层转发路由规则,实际使用的时候需要修改这个地方
- static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
- {{IPv4(,,,), IPv4(,,,), , , IPPROTO_TCP}, },
- {{IPv4(,,,), IPv4(,,,), , , IPPROTO_TCP}, },
- {{IPv4(,,,), IPv4(,,,), , , IPPROTO_TCP}, },
- {{IPv4(,,,), IPv4(,,,), , , IPPROTO_TCP}, },
- };
- static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
- {{ {0xfe, 0x80, , , , , , , 0x02, 0x1e, 0x67, 0xff, 0xfe, , , },
- {0xfe, 0x80, , , , , , , 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
- , , IPPROTO_TCP}, },
- {{ {0xfe, 0x90, , , , , , , 0x02, 0x1e, 0x67, 0xff, 0xfe, , , },
- {0xfe, 0x90, , , , , , , 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
- , , IPPROTO_TCP}, },
- {{ {0xfe, 0xa0, , , , , , , 0x02, 0x1e, 0x67, 0xff, 0xfe, , , },
- {0xfe, 0xa0, , , , , , , 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
- , , IPPROTO_TCP}, },
- {{ {0xfe, 0xb0, , , , , , , 0x02, 0x1e, 0x67, 0xff, 0xfe, , , },
- {0xfe, 0xb0, , , , , , , 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
- , , IPPROTO_TCP}, },
- };
- typedef struct rte_hash lookup_struct_t;
- static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
- static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS];
- #ifdef RTE_ARCH_X86_64
- /* default to 4 million hash entries (approx) */
- #define L3FWD_HASH_ENTRIES 1024*1024*4
- #else
- /* 32-bit has less address-space for hugepage memory, limit to 1M entries */
- #define L3FWD_HASH_ENTRIES 1024*1024*1
- #endif
- #define HASH_ENTRY_NUMBER_DEFAULT 4
- static uint32_t hash_entry_number = HASH_ENTRY_NUMBER_DEFAULT;
- static inline uint32_tipv4_hash_crc(const void *data,
- __rte_unused uint32_t data_len, uint32_t init_val){
- const union ipv4_5tuple_host *k;
- uint32_t t; const uint32_t *p;
- k = data;
- t = k->proto;
- p = (const uint32_t *)&k->port_src;
- #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
- init_val = rte_hash_crc_4byte(t, init_val);
- init_val = rte_hash_crc_4byte(k->ip_src, init_val);
- init_val = rte_hash_crc_4byte(k->ip_dst, init_val);
- init_val = rte_hash_crc_4byte(*p, init_val);
- #else /* RTE_MACHINE_CPUFLAG_SSE4_2 */
- init_val = rte_jhash_1word(t, init_val);
- init_val = rte_jhash_1word(k->ip_src, init_val);
- init_val = rte_jhash_1word(k->ip_dst, init_val);
- init_val = rte_jhash_1word(*p, init_val);
- #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
- return (init_val);
- }
- static inline uint32_tipv6_hash_crc(const void *data,
- __rte_unused uint32_t data_len, uint32_t init_val){
- const union ipv6_5tuple_host *k;
- uint32_t t;
- const uint32_t *p;
- #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
- const uint32_t *ip_src0, *ip_src1, *ip_src2, *ip_src3;
- const uint32_t *ip_dst0, *ip_dst1, *ip_dst2, *ip_dst3;
- #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
- k = data;
- t = k->proto;
- p = (const uint32_t *)&k->port_src;
- #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
- ip_src0 = (const uint32_t *) k->ip_src;
- ip_src1 = (const uint32_t *)(k->ip_src+);
- ip_src2 = (const uint32_t *)(k->ip_src+);
- ip_src3 = (const uint32_t *)(k->ip_src+);
- ip_dst0 = (const uint32_t *) k->ip_dst;
- ip_dst1 = (const uint32_t *)(k->ip_dst+);
- ip_dst2 = (const uint32_t *)(k->ip_dst+);
- ip_dst3 = (const uint32_t *)(k->ip_dst+);
- init_val = rte_hash_crc_4byte(t, init_val);
- init_val = rte_hash_crc_4byte(*ip_src0, init_val);
- init_val = rte_hash_crc_4byte(*ip_src1, init_val);
- init_val = rte_hash_crc_4byte(*ip_src2, init_val);
- init_val = rte_hash_crc_4byte(*ip_src3, init_val);
- init_val = rte_hash_crc_4byte(*ip_dst0, init_val);
- init_val = rte_hash_crc_4byte(*ip_dst1, init_val);
- init_val = rte_hash_crc_4byte(*ip_dst2, init_val);
- init_val = rte_hash_crc_4byte(*ip_dst3, init_val);
- init_val = rte_hash_crc_4byte(*p, init_val);
- #else /* RTE_MACHINE_CPUFLAG_SSE4_2 */
- init_val = rte_jhash_1word(t, init_val);
- init_val = rte_jhash(k->ip_src, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val);
- init_val = rte_jhash(k->ip_dst, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val);
- init_val = rte_jhash_1word(*p, init_val);
- #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
- return (init_val);
- }
- #define IPV4_L3FWD_NUM_ROUTES \
- (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[]))
- #define IPV6_L3FWD_NUM_ROUTES \
- (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[]))
- static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
- static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
- #endif
- #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
- struct ipv4_l3fwd_route {
- uint32_t ip; //看起来l3fwd支持RSS哟
- uint8_t depth; //深度
- uint8_t if_out; //数据转发的出口
- };
- struct ipv6_l3fwd_route {
- uint8_t ip[];
- uint8_t depth;
- uint8_t if_out;
- };
- //这里设置默认的静态的三层转发路由规则,实际使用的时候需要修改这个地方
- static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { //只有8个元素???
- {IPv4(,,,), , }, //{IPv4(192,168,10,0), 24, 0},
- {IPv4(,,,), , },
- {IPv4(,,,), , },
- {IPv4(,,,), , },
- {IPv4(,,,), , },
- {IPv4(,,,), , },
- {IPv4(,,,), , },
- {IPv4(,,,), , },
- };
- static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
- {{,,,,,,,,,,,,,,,}, , },
- {{,,,,,,,,,,,,,,,}, , },
- {{,,,,,,,,,,,,,,,}, , },
- {{,,,,,,,,,,,,,,,}, , },
- {{,,,,,,,,,,,,,,,}, , },
- {{,,,,,,,,,,,,,,,}, , },
- {{,,,,,,,,,,,,,,,}, , },
- {{,,,,,,,,,,,,,,,}, , },
- };
- static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
- {{IPv4(,,,), IPv4(,,,), , , IPPROTO_TCP}, },
- {{IPv4(,,,), IPv4(,,,), , , IPPROTO_TCP}, },
- {{IPv4(,,,), IPv4(,,,), , , IPPROTO_TCP}, },
- {{IPv4(,,,), IPv4(,,,), , , IPPROTO_TCP}, },
- };
- static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
- {{
- {0xfe, 0x80, , , , , , , 0x02, 0x1e, 0x67, 0xff, 0xfe, , , },
- {0xfe, 0x80, , , , , , , 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
- , , IPPROTO_TCP}, },
- {{
- {0xfe, 0x90, , , , , , , 0x02, 0x1e, 0x67, 0xff, 0xfe, , , },
- {0xfe, 0x90, , , , , , , 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
- , , IPPROTO_TCP}, },
- {{
- {0xfe, 0xa0, , , , , , , 0x02, 0x1e, 0x67, 0xff, 0xfe, , , },
- {0xfe, 0xa0, , , , , , , 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
- , , IPPROTO_TCP}, },
- {{
- {0xfe, 0xb0, , , , , , , 0x02, 0x1e, 0x67, 0xff, 0xfe, , , },
- {0xfe, 0xb0, , , , , , , 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
- , , IPPROTO_TCP}, },
- };
- #define IPV4_L3FWD_NUM_ROUTES \
- (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[]))
- #define IPV6_L3FWD_NUM_ROUTES \
- (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[]))
- #define IPV4_L3FWD_LPM_MAX_RULES 1024
- #define IPV6_L3FWD_LPM_MAX_RULES 1024
- #define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 16)
- typedef struct rte_lpm lookup_struct_t;
- typedef struct rte_lpm6 lookup6_struct_t;
- static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];//8个元素
- static lookup6_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS];
- #endif
- struct lcore_conf {//保存lcore的配置信息
- uint16_t n_rx_queue; //接收队列的总数量
- struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];//物理端口和网卡队列编号组成的数组
- uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; //发送队列的编号组成的数组
- struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];//mbuf表
- lookup_struct_t * ipv4_lookup_struct; //实际上就是struct rte_lpm *
- #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
- lookup6_struct_t * ipv6_lookup_struct;
- #else
- lookup_struct_t * ipv6_lookup_struct;
- #endif
- } __rte_cache_aligned;
- static struct lcore_conf lcore_conf[RTE_MAX_LCORE];
- /* Send burst of packets on an output interface */
- static inline int //在输出接口port上把数据包burst发送出去
- send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port)
- {
- struct rte_mbuf **m_table;
- int ret;
- uint16_t queueid;
- queueid = qconf->tx_queue_id[port];
- m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
- ret = rte_eth_tx_burst(port, queueid, m_table, n);
- if (unlikely(ret < n)) {
- do {
- rte_pktmbuf_free(m_table[ret]);
- } while (++ret < n);
- }
- return ;
- }
- /* Enqueue a single packet, and send burst if queue is filled */
- static inline int //发送一个mbuf
- send_single_packet(struct rte_mbuf *m, uint8_t port)
- {
- uint32_t lcore_id;
- uint16_t len;
- struct lcore_conf *qconf;
- lcore_id = rte_lcore_id();
- qconf = &lcore_conf[lcore_id];
- len = qconf->tx_mbufs[port].len;
- qconf->tx_mbufs[port].m_table[len] = m;
- len++;
- /* enough pkts to be sent */
- if (unlikely(len == MAX_PKT_BURST)) { //如果累计到32个数据包
- send_burst(qconf, MAX_PKT_BURST, port); //把32个数据包发送出去
- len = ;
- }
- qconf->tx_mbufs[port].len = len;
- return ;
- }
- static inline __attribute__ void
- send_packetsx4(struct lcore_conf *qconf, uint8_t port,
- struct rte_mbuf *m[], uint32_t num)
- {
- uint32_t len, j, n;
- len = qconf->tx_mbufs[port].len;
- /* 如果某个队列的发送缓冲区为空,而且已有足够数量数据包待发送,那么立即发送
- * If TX buffer for that queue is empty, and we have enough packets,
- * then send them straightway.
- */
- if (num >= MAX_TX_BURST && len == ) {
- n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num);//burst发送num个mbufs
- if (unlikely(n < num)) { //如果实际发送数据包的个数小于num
- do {
- rte_pktmbuf_free(m[n]); //把剩下的num-n个mbufs返回mempool
- } while (++n < num);
- }
- return;
- }
- /*
- * Put packets into TX buffer for that queue.
- */
- //把那些数据包放到网卡队列的发送缓冲区中
- n = len + num;
- n = (n > MAX_PKT_BURST) ? MAX_PKT_BURST - len : num;
- j = ;
- switch (n % FWDSTEP) {
- while (j < n) {
- case :
- qconf->tx_mbufs[port].m_table[len + j] = m[j];
- j++;
- case :
- qconf->tx_mbufs[port].m_table[len + j] = m[j];
- j++;
- case :
- qconf->tx_mbufs[port].m_table[len + j] = m[j];
- j++;
- case :
- qconf->tx_mbufs[port].m_table[len + j] = m[j];
- j++;
- }
- }
- len += n;
- /*待发送的包数量达到32个 enough pkts to be sent */
- if (unlikely(len == MAX_PKT_BURST)) {
- send_burst(qconf, MAX_PKT_BURST, port);
- /* copy rest of the packets into the TX buffer. */
- len = num - n;
- j = ;
- switch (len % FWDSTEP) {
- while (j < len) {
- case :
- qconf->tx_mbufs[port].m_table[j] = m[n + j];
- j++;
- case :
- qconf->tx_mbufs[port].m_table[j] = m[n + j];
- j++;
- case :
- qconf->tx_mbufs[port].m_table[j] = m[n + j];
- j++;
- case :
- qconf->tx_mbufs[port].m_table[j] = m[n + j];
- j++;
- }
- }
- }
- qconf->tx_mbufs[port].len = len;
- }
- #ifdef DO_RFC_1812_CHECKS
- static inline int
- is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
- {
- /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
- /*
- * 1. The packet length reported by the Link Layer must be large
- * enough to hold the minimum length legal IP datagram (20 bytes).
- */
- if (link_len < sizeof(struct ipv4_hdr))
- return -;
- /* 2. The IP checksum must be correct. */
- /* this is checked in H/W */
- /*
- * 3. The IP version number must be 4. If the version number is not 4
- * then the packet may be another version of IP, such as IPng or
- * ST-II.
- */
- if (((pkt->version_ihl) >> ) != )
- return -;
- /*
- * 4. The IP header length field must be large enough to hold the
- * minimum length legal IP datagram (20 bytes = 5 words).
- */
- if ((pkt->version_ihl & 0xf) < )
- return -;
- /*
- * 5. The IP total length field must be large enough to hold the IP
- * datagram header, whose length is specified in the IP header length
- * field.
- */
- if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
- return -;
- return ;
- }
- #endif
- #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
- static __m128i mask0;
- static __m128i mask1;
- static __m128i mask2;
- static inline uint8_t //哈希情形下获取转发出口
- get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid, lookup_struct_t * ipv4_l3fwd_lookup_struct)
- {
- int ret = ;
- union ipv4_5tuple_host key;
- ipv4_hdr = (uint8_t *)ipv4_hdr + offsetof(struct ipv4_hdr, time_to_live);
- __m128i data = _mm_loadu_si128((__m128i*)(ipv4_hdr));
- /* Get 5 tuple: dst port, src port, dst IP address, src IP address and protocol */
- key.xmm = _mm_and_si128(data, mask0);
- /* Find destination port */
- ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key);
- return (uint8_t)((ret < )? portid : ipv4_l3fwd_out_if[ret]);
- }
- static inline uint8_t
- get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, lookup_struct_t * ipv6_l3fwd_lookup_struct)
- {
- int ret = ;
- union ipv6_5tuple_host key;
- ipv6_hdr = (uint8_t *)ipv6_hdr + offsetof(struct ipv6_hdr, payload_len);
- __m128i data0 = _mm_loadu_si128((__m128i*)(ipv6_hdr));
- __m128i data1 = _mm_loadu_si128((__m128i*)(((uint8_t*)ipv6_hdr)+sizeof(__m128i)));
- __m128i data2 = _mm_loadu_si128((__m128i*)(((uint8_t*)ipv6_hdr)+sizeof(__m128i)+sizeof(__m128i)));
- /* Get part of 5 tuple: src IP address lower 96 bits and protocol */
- key.xmm[] = _mm_and_si128(data0, mask1);
- /* Get part of 5 tuple: dst IP address lower 96 bits and src IP address higher 32 bits */
- key.xmm[] = data1;
- /* Get part of 5 tuple: dst port and src port and dst IP address higher 32 bits */
- key.xmm[] = _mm_and_si128(data2, mask2);
- /* Find destination port */
- ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key);
- return (uint8_t)((ret < )? portid : ipv6_l3fwd_out_if[ret]);
- }
- #endif
- #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
- static inline uint8_t //LPM情形下获取ipv4数据包的目的端口
- get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid, lookup_struct_t * ipv4_l3fwd_lookup_struct)
- {
- uint8_t next_hop;
- return (uint8_t) ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct,
- rte_be_to_cpu_32(((struct ipv4_hdr *)ipv4_hdr)->dst_addr),
- &next_hop) == ) ? next_hop : portid);
- }
- static inline uint8_t
- get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, lookup6_struct_t * ipv6_l3fwd_lookup_struct)
- {
- uint8_t next_hop;
- return (uint8_t) ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct,
- ((struct ipv6_hdr*)ipv6_hdr)->dst_addr, &next_hop) == )?
- next_hop : portid);
- }
- #endif
- static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid,
- struct lcore_conf *qconf) __attribute__((unused));
- #if ((APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) && \
- (ENABLE_MULTI_BUFFER_OPTIMIZE == ))
- static inline void get_ipv6_5tuple(struct rte_mbuf* m0, __m128i mask0, __m128i mask1,
- union ipv6_5tuple_host * key)
- {
- __m128i tmpdata0 = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m0, unsigned char *)
- + sizeof(struct ether_hdr) + offsetof(struct ipv6_hdr, payload_len)));
- __m128i tmpdata1 = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m0, unsigned char *)
- + sizeof(struct ether_hdr) + offsetof(struct ipv6_hdr, payload_len)
- + sizeof(__m128i)));
- __m128i tmpdata2 = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m0, unsigned char *)
- + sizeof(struct ether_hdr) + offsetof(struct ipv6_hdr, payload_len)
- + sizeof(__m128i) + sizeof(__m128i)));
- key->xmm[] = _mm_and_si128(tmpdata0, mask0);
- key->xmm[] = tmpdata1;
- key->xmm[] = _mm_and_si128(tmpdata2, mask1);
- return;
- }
- static inline void
- simple_ipv4_fwd_4pkts(struct rte_mbuf* m[], uint8_t portid, struct lcore_conf *qconf)
- {
- struct ether_hdr *eth_hdr[];
- struct ipv4_hdr *ipv4_hdr[];
- void *d_addr_bytes[];
- uint8_t dst_port[];
- int32_t ret[];
- union ipv4_5tuple_host key[];
- __m128i data[];
- eth_hdr[] = rte_pktmbuf_mtod(m[], struct ether_hdr *);
- eth_hdr[] = rte_pktmbuf_mtod(m[], struct ether_hdr *);
- eth_hdr[] = rte_pktmbuf_mtod(m[], struct ether_hdr *);
- eth_hdr[] = rte_pktmbuf_mtod(m[], struct ether_hdr *);
- /* Handle IPv4 headers.*/
- ipv4_hdr[] = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m[], unsigned char *) +
- sizeof(struct ether_hdr));
- ipv4_hdr[] = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m[], unsigned char *) +
- sizeof(struct ether_hdr));
- ipv4_hdr[] = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m[], unsigned char *) +
- sizeof(struct ether_hdr));
- ipv4_hdr[] = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m[], unsigned char *) +
- sizeof(struct ether_hdr));
- #ifdef DO_RFC_1812_CHECKS
- /* Check to make sure the packet is valid (RFC1812) */
- uint8_t valid_mask = MASK_ALL_PKTS;
- if (is_valid_ipv4_pkt(ipv4_hdr[], m[]->pkt_len) < ) {
- rte_pktmbuf_free(m[]);
- valid_mask &= EXECLUDE_1ST_PKT;
- }
- if (is_valid_ipv4_pkt(ipv4_hdr[], m[]->pkt_len) < ) {
- rte_pktmbuf_free(m[]);
- valid_mask &= EXECLUDE_2ND_PKT;
- }
- if (is_valid_ipv4_pkt(ipv4_hdr[], m[]->pkt_len) < ) {
- rte_pktmbuf_free(m[]);
- valid_mask &= EXECLUDE_3RD_PKT;
- }
- if (is_valid_ipv4_pkt(ipv4_hdr[], m[]->pkt_len) < ) {
- rte_pktmbuf_free(m[]);
- valid_mask &= EXECLUDE_4TH_PKT;
- }
- if (unlikely(valid_mask != MASK_ALL_PKTS)) {
- if (valid_mask == ){
- return;
- } else {
- uint8_t i = ;
- for (i = ; i < ; i++) {
- if ((0x1 << i) & valid_mask) {
- l3fwd_simple_forward(m[i], portid, qconf);
- }
- }
- return;
- }
- }
- #endif // End of #ifdef DO_RFC_1812_CHECKS
- data[] = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m[], unsigned char *) +
- sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
- data[] = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m[], unsigned char *) +
- sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
- data[] = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m[], unsigned char *) +
- sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
- data[] = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m[], unsigned char *) +
- sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
- key[].xmm = _mm_and_si128(data[], mask0);
- key[].xmm = _mm_and_si128(data[], mask0);
- key[].xmm = _mm_and_si128(data[], mask0);
- key[].xmm = _mm_and_si128(data[], mask0);
- const void *key_array[] = {&key[], &key[], &key[],&key[]};
- rte_hash_lookup_multi(qconf->ipv4_lookup_struct, &key_array[], , ret);
- dst_port[] = (uint8_t) ((ret[] < ) ? portid : ipv4_l3fwd_out_if[ret[]]);
- dst_port[] = (uint8_t) ((ret[] < ) ? portid : ipv4_l3fwd_out_if[ret[]]);
- dst_port[] = (uint8_t) ((ret[] < ) ? portid : ipv4_l3fwd_out_if[ret[]]);
- dst_port[] = (uint8_t) ((ret[] < ) ? portid : ipv4_l3fwd_out_if[ret[]]);
- if (dst_port[] >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port[]) == )
- dst_port[] = portid;
- if (dst_port[] >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port[]) == )
- dst_port[] = portid;
- if (dst_port[] >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port[]) == )
- dst_port[] = portid;
- if (dst_port[] >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port[]) == )
- dst_port[] = portid;
- /* 02:00:00:00:00:xx */
- d_addr_bytes[] = ð_hdr[]->d_addr.addr_bytes[];
- d_addr_bytes[] = ð_hdr[]->d_addr.addr_bytes[];
- d_addr_bytes[] = ð_hdr[]->d_addr.addr_bytes[];
- d_addr_bytes[] = ð_hdr[]->d_addr.addr_bytes[];
- *((uint64_t *)d_addr_bytes[]) = 0x000000000002 + ((uint64_t)dst_port[] << );
- *((uint64_t *)d_addr_bytes[]) = 0x000000000002 + ((uint64_t)dst_port[] << );
- *((uint64_t *)d_addr_bytes[]) = 0x000000000002 + ((uint64_t)dst_port[] << );
- *((uint64_t *)d_addr_bytes[]) = 0x000000000002 + ((uint64_t)dst_port[] << );
- #ifdef DO_RFC_1812_CHECKS
- /* Update time to live and header checksum */
- --(ipv4_hdr[]->time_to_live);
- --(ipv4_hdr[]->time_to_live);
- --(ipv4_hdr[]->time_to_live);
- --(ipv4_hdr[]->time_to_live);
- ++(ipv4_hdr[]->hdr_checksum);
- ++(ipv4_hdr[]->hdr_checksum);
- ++(ipv4_hdr[]->hdr_checksum);
- ++(ipv4_hdr[]->hdr_checksum);
- #endif
- /* src addr */
- ether_addr_copy(&ports_eth_addr[dst_port[]], ð_hdr[]->s_addr);
- ether_addr_copy(&ports_eth_addr[dst_port[]], ð_hdr[]->s_addr);
- ether_addr_copy(&ports_eth_addr[dst_port[]], ð_hdr[]->s_addr);
- ether_addr_copy(&ports_eth_addr[dst_port[]], ð_hdr[]->s_addr);
- send_single_packet(m[], (uint8_t)dst_port[]);
- send_single_packet(m[], (uint8_t)dst_port[]);
- send_single_packet(m[], (uint8_t)dst_port[]);
- send_single_packet(m[], (uint8_t)dst_port[]);
- }
- #define MASK_ALL_PKTS 0xf
- #define EXECLUDE_1ST_PKT 0xe
- #define EXECLUDE_2ND_PKT 0xd
- #define EXECLUDE_3RD_PKT 0xb
- #define EXECLUDE_4TH_PKT 0x7
- static inline void
- simple_ipv6_fwd_4pkts(struct rte_mbuf* m[], uint8_t portid, struct lcore_conf *qconf)
- {
- struct ether_hdr *eth_hdr[];
- __attribute__((unused)) struct ipv6_hdr *ipv6_hdr[];
- void *d_addr_bytes[];
- uint8_t dst_port[];
- int32_t ret[];
- union ipv6_5tuple_host key[];
- eth_hdr[] = rte_pktmbuf_mtod(m[], struct ether_hdr *);
- eth_hdr[] = rte_pktmbuf_mtod(m[], struct ether_hdr *);
- eth_hdr[] = rte_pktmbuf_mtod(m[], struct ether_hdr *);
- eth_hdr[] = rte_pktmbuf_mtod(m[], struct ether_hdr *);
- /* Handle IPv6 headers.*/
- ipv6_hdr[] = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m[], unsigned char *) +
- sizeof(struct ether_hdr));
- ipv6_hdr[] = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m[], unsigned char *) +
- sizeof(struct ether_hdr));
- ipv6_hdr[] = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m[], unsigned char *) +
- sizeof(struct ether_hdr));
- ipv6_hdr[] = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m[], unsigned char *) +
- sizeof(struct ether_hdr));
- get_ipv6_5tuple(m[], mask1, mask2, &key[]);
- get_ipv6_5tuple(m[], mask1, mask2, &key[]);
- get_ipv6_5tuple(m[], mask1, mask2, &key[]);
- get_ipv6_5tuple(m[], mask1, mask2, &key[]);
- const void *key_array[] = {&key[], &key[], &key[],&key[]};
- rte_hash_lookup_multi(qconf->ipv6_lookup_struct, &key_array[], , ret);
- dst_port[] = (uint8_t) ((ret[] < )? portid:ipv6_l3fwd_out_if[ret[]]);
- dst_port[] = (uint8_t) ((ret[] < )? portid:ipv6_l3fwd_out_if[ret[]]);
- dst_port[] = (uint8_t) ((ret[] < )? portid:ipv6_l3fwd_out_if[ret[]]);
- dst_port[] = (uint8_t) ((ret[] < )? portid:ipv6_l3fwd_out_if[ret[]]);
- if (dst_port[] >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port[]) == )
- dst_port[] = portid;
- if (dst_port[] >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port[]) == )
- dst_port[] = portid;
- if (dst_port[] >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port[]) == )
- dst_port[] = portid;
- if (dst_port[] >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port[]) == )
- dst_port[] = portid;
- /* 02:00:00:00:00:xx */
- d_addr_bytes[] = ð_hdr[]->d_addr.addr_bytes[];
- d_addr_bytes[] = ð_hdr[]->d_addr.addr_bytes[];
- d_addr_bytes[] = ð_hdr[]->d_addr.addr_bytes[];
- d_addr_bytes[] = ð_hdr[]->d_addr.addr_bytes[];
- *((uint64_t *)d_addr_bytes[]) = 0x000000000002 + ((uint64_t)dst_port[] << );
- *((uint64_t *)d_addr_bytes[]) = 0x000000000002 + ((uint64_t)dst_port[] << );
- *((uint64_t *)d_addr_bytes[]) = 0x000000000002 + ((uint64_t)dst_port[] << );
- *((uint64_t *)d_addr_bytes[]) = 0x000000000002 + ((uint64_t)dst_port[] << );
- /* src addr */
- ether_addr_copy(&ports_eth_addr[dst_port[]], ð_hdr[]->s_addr);
- ether_addr_copy(&ports_eth_addr[dst_port[]], ð_hdr[]->s_addr);
- ether_addr_copy(&ports_eth_addr[dst_port[]], ð_hdr[]->s_addr);
- ether_addr_copy(&ports_eth_addr[dst_port[]], ð_hdr[]->s_addr);
- send_single_packet(m[], (uint8_t)dst_port[]);
- send_single_packet(m[], (uint8_t)dst_port[]);
- send_single_packet(m[], (uint8_t)dst_port[]);
- send_single_packet(m[], (uint8_t)dst_port[]);
- }
- #endif /* APP_LOOKUP_METHOD */
- static inline __attribute__ void //简单三层转发,没有使用SSE4.1优化
- l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, struct lcore_conf *qconf)
- {
- struct ether_hdr *eth_hdr;
- struct ipv4_hdr *ipv4_hdr;
- void *d_addr_bytes;
- uint8_t dst_port;
- eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); //得到eth_hdr指针
- if (m->ol_flags & PKT_RX_IPV4_HDR) { //如果是ipv4包
- /* Handle IPv4 headers.*/
- ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m, unsigned char *) +
- sizeof(struct ether_hdr));
- #ifdef DO_RFC_1812_CHECKS
- /* Check to make sure the packet is valid (RFC1812) */
- if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < ) {
- rte_pktmbuf_free(m);
- return;
- }
- #endif
- //想要满足文生提出的需求,主要在这里修改ip层和tcp层的数据内容。
- dst_port = get_ipv4_dst_port(ipv4_hdr, portid, //获取转发出口
- qconf->ipv4_lookup_struct);
- if (dst_port >= RTE_MAX_ETHPORTS ||
- (enabled_port_mask & << dst_port) == )
- dst_port = portid; //出错则直接把入口作为转发出口
- /* 02:00:00:00:00:xx 这里是修改目的mac地址吗??? */
- d_addr_bytes = ð_hdr->d_addr.addr_bytes[];
- *((uint64_t *)d_addr_bytes) = ETHER_LOCAL_ADMIN_ADDR +
- ((uint64_t)dst_port << );
- #ifdef DO_RFC_1812_CHECKS
- /* Update time to live and header checksum */
- --(ipv4_hdr->time_to_live);
- ++(ipv4_hdr->hdr_checksum);
- #endif
- /* //把进入包的目的mac地址作为转发包的源地址 src addr */
- ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr);
- send_single_packet(m, dst_port); //经过dst_port把转发包发送出去
- } else { //如果是ipv6包
- /* Handle IPv6 headers.*/
- struct ipv6_hdr *ipv6_hdr;
- ipv6_hdr = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m, unsigned char *) +
- sizeof(struct ether_hdr));
- dst_port = get_ipv6_dst_port(ipv6_hdr, portid, qconf->ipv6_lookup_struct);
- if (dst_port >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port) == )
- dst_port = portid;
- /* 02:00:00:00:00:xx */
- d_addr_bytes = ð_hdr->d_addr.addr_bytes[];
- *((uint64_t *)d_addr_bytes) = ETHER_LOCAL_ADMIN_ADDR +
- ((uint64_t)dst_port << );
- /* src addr */
- ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr);
- send_single_packet(m, dst_port);
- }
- }
- #ifdef DO_RFC_1812_CHECKS
- #define IPV4_MIN_VER_IHL 0x45
- #define IPV4_MAX_VER_IHL 0x4f
- #define IPV4_MAX_VER_IHL_DIFF (IPV4_MAX_VER_IHL - IPV4_MIN_VER_IHL)
- /* Minimum value of IPV4 total length (20B) in network byte order. */
- #define IPV4_MIN_LEN_BE (sizeof(struct ipv4_hdr) << 8)
- /*
- * From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2:
- * - The IP version number must be 4.
- * - The IP header length field must be large enough to hold the
- * minimum length legal IP datagram (20 bytes = 5 words).
- * - The IP total length field must be large enough to hold the IP
- * datagram header, whose length is specified in the IP header length
- * field.
- * If we encounter invalid IPV4 packet, then set destination port for it
- * to BAD_PORT value.
- */
- static inline __attribute__ void //ipv4错误检查
- rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t flags)
- {
- uint8_t ihl;
- if ((flags & PKT_RX_IPV4_HDR) != ) {//如果是ipv4
- ihl = ipv4_hdr->version_ihl - IPV4_MIN_VER_IHL;
- ipv4_hdr->time_to_live--;
- ipv4_hdr->hdr_checksum++;
- if (ihl > IPV4_MAX_VER_IHL_DIFF ||
- ((uint8_t)ipv4_hdr->total_length == &&
- ipv4_hdr->total_length < IPV4_MIN_LEN_BE)) {
- dp[] = BAD_PORT; //应该是出错了
- }
- }
- }
- #else
- #define rfc1812_process(mb, dp) do { } while (0)
- #endif /* DO_RFC_1812_CHECKS */
- #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
- (ENABLE_MULTI_BUFFER_OPTIMIZE == ))
- static inline __attribute__ uint16_t //得到目的ip地址对应的转发出口
- get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
- uint32_t dst_ipv4, uint8_t portid)
- {
- uint8_t next_hop;
- struct ipv6_hdr *ipv6_hdr;
- struct ether_hdr *eth_hdr;
- if (pkt->ol_flags & PKT_RX_IPV4_HDR) { //如果都是ipv4
- if (rte_lpm_lookup(qconf->ipv4_lookup_struct, dst_ipv4,
- &next_hop) != ) //返回0则查找到,next_hop中已经得到下一跳
- next_hop = portid; //此时没找到,则直接把portid设定为下一跳
- } else if (pkt->ol_flags & PKT_RX_IPV6_HDR) { //如果都是ipv6
- eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
- ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + );
- if (rte_lpm6_lookup(qconf->ipv6_lookup_struct,
- ipv6_hdr->dst_addr, &next_hop) != )
- next_hop = portid;
- } else { //如果有其他种类的数据包
- next_hop = portid;//设定下一跳
- }
- return next_hop;//返回下一跳
- }
- static inline void //处理一个数据包
- process_packet(struct lcore_conf *qconf, struct rte_mbuf *pkt,
- uint16_t *dst_port, uint8_t portid)
- {
- struct ether_hdr *eth_hdr;
- struct ipv4_hdr *ipv4_hdr;
- uint32_t dst_ipv4;
- uint16_t dp;
- __m128i te, ve;
- eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);//获取eth首部
- ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + );//获取ipv4首部
- dst_ipv4 = ipv4_hdr->dst_addr; //得到大端的ipv4目的地址
- dst_ipv4 = rte_be_to_cpu_32(dst_ipv4);//转换成小端
- dp = get_dst_port(qconf, pkt, dst_ipv4, portid); //获取转发出口/下一跳
- te = _mm_load_si128((__m128i *)eth_hdr);
- ve = val_eth[dp];
- dst_port[] = dp;
- rfc1812_process(ipv4_hdr, dst_port, pkt->ol_flags);
- te = _mm_blend_epi16(te, ve, MASK_ETH);
- _mm_store_si128((__m128i *)eth_hdr, te);
- }
- /* 从4个mbufs中读取目的IP地址和ol_flags
- * Read ol_flags and destination IPV4 addresses from 4 mbufs.
- */
- static inline void
- processx4_step1(struct rte_mbuf *pkt[FWDSTEP], __m128i *dip, uint32_t *flag)
- {
- struct ipv4_hdr *ipv4_hdr;
- struct ether_hdr *eth_hdr;
- uint32_t x0, x1, x2, x3;
- //第一个mbuf
- eth_hdr = rte_pktmbuf_mtod(pkt[], struct ether_hdr *);//得到eth_hdr
- ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + );//得到ipv4_hdr
- x0 = ipv4_hdr->dst_addr;//得到dst_addr
- flag[] = pkt[]->ol_flags & PKT_RX_IPV4_HDR;
- //第二个mbuf
- eth_hdr = rte_pktmbuf_mtod(pkt[], struct ether_hdr *);
- ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + );
- x1 = ipv4_hdr->dst_addr;
- flag[] &= pkt[]->ol_flags; //与前一个mbuf标志做&运算
- //第三个mbuf
- eth_hdr = rte_pktmbuf_mtod(pkt[], struct ether_hdr *);
- ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + );
- x2 = ipv4_hdr->dst_addr;
- flag[] &= pkt[]->ol_flags; //与前一个mbuf标志做&运算
- //第四个mbuf
- eth_hdr = rte_pktmbuf_mtod(pkt[], struct ether_hdr *);
- ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + );
- x3 = ipv4_hdr->dst_addr;
- flag[] &= pkt[]->ol_flags; //与前一个mbuf标志做&运算
- dip[] = _mm_set_epi32(x3, x2, x1, x0);//把4个dst_addr合并为128位的寄存器
- }
- /*
- * Lookup into LPM for destination port.
- * If lookup fails, use incoming port (portid) as destination port.
- */ //在LPM中查找转发出口/下一跳,如果没有找到则把入口作为转发出口
- static inline void
- processx4_step2(const struct lcore_conf *qconf, __m128i dip, uint32_t flag,
- uint8_t portid, struct rte_mbuf *pkt[FWDSTEP], uint16_t dprt[FWDSTEP])
- {
- rte_xmm_t dst;
- const __m128i bswap_mask = _mm_set_epi8(, , , , , , , ,
- , , , , , , , ); //表示重新排列的顺序
- /* Byte swap 4 IPV4 addresses. 按照字节交换ipv4地址 */
- dip = _mm_shuffle_epi8(dip, bswap_mask);
- /* 如果4个分组都是ipv4的 if all 4 packets are IPV4. */
- if (likely(flag != )) {
- rte_lpm_lookupx4(qconf->ipv4_lookup_struct, dip, dprt, portid);
- } else {
- dst.x = dip; //获取4个目的ip地址
- dprt[] = get_dst_port(qconf, pkt[], dst.u32[], portid);//得到下一跳/转发出口
- dprt[] = get_dst_port(qconf, pkt[], dst.u32[], portid);
- dprt[] = get_dst_port(qconf, pkt[], dst.u32[], portid);
- dprt[] = get_dst_port(qconf, pkt[], dst.u32[], portid);
- }
- }
- /*
- * Update source and destination MAC addresses in the ethernet header.
- * Perform RFC1812 checks and updates for IPV4 packets.
- */ //更新目的mac和源mac地址
- static inline void
- processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
- {
- __m128i te[FWDSTEP];
- __m128i ve[FWDSTEP];
- __m128i *p[FWDSTEP];
- p[] = (rte_pktmbuf_mtod(pkt[], __m128i *));//指向第一个数据包的内容
- p[] = (rte_pktmbuf_mtod(pkt[], __m128i *));
- p[] = (rte_pktmbuf_mtod(pkt[], __m128i *));
- p[] = (rte_pktmbuf_mtod(pkt[], __m128i *));
- ve[] = val_eth[dst_port[]];
- te[] = _mm_load_si128(p[]);//将p[0]指向的内容加载到128位寄存器中
- ve[] = val_eth[dst_port[]];
- te[] = _mm_load_si128(p[]);
- ve[] = val_eth[dst_port[]];
- te[] = _mm_load_si128(p[]);
- ve[] = val_eth[dst_port[]];
- te[] = _mm_load_si128(p[]);
- /*替换更新前12个字节,保留剩余 Update first 12 bytes, keep rest bytes intact. */
- te[] = _mm_blend_epi16(te[], ve[], MASK_ETH);
- te[] = _mm_blend_epi16(te[], ve[], MASK_ETH);
- te[] = _mm_blend_epi16(te[], ve[], MASK_ETH);
- te[] = _mm_blend_epi16(te[], ve[], MASK_ETH);
- _mm_store_si128(p[], te[]);
- _mm_store_si128(p[], te[]);
- _mm_store_si128(p[], te[]);
- _mm_store_si128(p[], te[]);
- rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[] + ),
- &dst_port[], pkt[]->ol_flags);
- rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[] + ),
- &dst_port[], pkt[]->ol_flags);
- rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[] + ),
- &dst_port[], pkt[]->ol_flags);
- rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[] + ),
- &dst_port[], pkt[]->ol_flags);
- }
- /* //把转发出口相同的连续数据包做一次burst发送
- 为了避免额外的延迟,与其他的包处理一起完成,但在对转发出口做了决策之后。
- * We group consecutive packets with the same destionation port into one burst.
- * To avoid extra latency this is done together with some other packet
- * processing, but after we made a final decision about packet's destination.
- * To do this we maintain:
- * pnum - array of number of consecutive packets with the same dest port for
- * each packet in the input burst. ***pnum是保存转发出口相同的连续数据包的数组
- * lp - pointer to the last updated element in the pnum. ***lp指向pnum中最后一次更新的元素
- * dlp - dest port value lp corresponds to. ***dlp为lp对应的转发出口编号
- */
- #define GRPSZ (1 << FWDSTEP) //
- #define GRPMSK (GRPSZ - 1) //
- #define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \
- if (likely((dlp) == (dcp)[(idx)])) { \
- (lp)[]++; \
- } else { \
- (dlp) = (dcp)[idx]; \
- (lp) = (pn) + (idx); \
- (lp)[] = ; \
- } \
- } while ()
- /*
- * Group consecutive packets with the same destination port in bursts of 4.
- * Suppose we have array of destionation ports:
- * dst_port[] = {a, b, c, d,, e, ... }
- * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
- * We doing 4 comparisions at once and the result is 4 bit mask.
- * This mask is used as an index into prebuild array of pnum values.
- */
- static inline uint16_t * //把出口相同的4个数据包构成一组
- port_groupx4(uint16_t pn[FWDSTEP + ], uint16_t *lp, __m128i dp1, __m128i dp2)
- {
- static const struct {
- uint64_t pnum; /*为pnum预设的4个值 prebuild 4 values for pnum[]. */
- int32_t idx; /*最后一次更新的元素的索引 index for new last updated elemnet. */
- uint16_t lpv; /*把值加到最后一次更新的元素 add value to the last updated element. */
- } gptbl[GRPSZ] = {
- {
- /* 0: a != b, b != c, c != d, d != e */
- .pnum = UINT64_C(0x0001000100010001),
- .idx = ,
- .lpv = ,
- },
- {
- /* 1: a == b, b != c, c != d, d != e */
- .pnum = UINT64_C(0x0001000100010002),
- .idx = ,
- .lpv = ,
- },
- {
- /* 2: a != b, b == c, c != d, d != e */
- .pnum = UINT64_C(0x0001000100020001),
- .idx = ,
- .lpv = ,
- },
- {
- /* 3: a == b, b == c, c != d, d != e */
- .pnum = UINT64_C(0x0001000100020003),
- .idx = ,
- .lpv = ,
- },
- {
- /* 4: a != b, b != c, c == d, d != e */
- .pnum = UINT64_C(0x0001000200010001),
- .idx = ,
- .lpv = ,
- },
- {
- /* 5: a == b, b != c, c == d, d != e */
- .pnum = UINT64_C(0x0001000200010002),
- .idx = ,
- .lpv = ,
- },
- {
- /* 6: a != b, b == c, c == d, d != e */
- .pnum = UINT64_C(0x0001000200030001),
- .idx = ,
- .lpv = ,
- },
- {
- /* 7: a == b, b == c, c == d, d != e */
- .pnum = UINT64_C(0x0001000200030004),
- .idx = ,
- .lpv = ,
- },
- {
- /* 8: a != b, b != c, c != d, d == e */
- .pnum = UINT64_C(0x0002000100010001),
- .idx = ,
- .lpv = ,
- },
- {
- /* 9: a == b, b != c, c != d, d == e */
- .pnum = UINT64_C(0x0002000100010002),
- .idx = ,
- .lpv = ,
- },
- {
- /* 0xa: a != b, b == c, c != d, d == e */
- .pnum = UINT64_C(0x0002000100020001),
- .idx = ,
- .lpv = ,
- },
- {
- /* 0xb: a == b, b == c, c != d, d == e */
- .pnum = UINT64_C(0x0002000100020003),
- .idx = ,
- .lpv = ,
- },
- {
- /* 0xc: a != b, b != c, c == d, d == e */
- .pnum = UINT64_C(0x0002000300010001),
- .idx = ,
- .lpv = ,
- },
- {
- /* 0xd: a == b, b != c, c == d, d == e */
- .pnum = UINT64_C(0x0002000300010002),
- .idx = ,
- .lpv = ,
- },
- {
- /* 0xe: a != b, b == c, c == d, d == e */
- .pnum = UINT64_C(0x0002000300040001),
- .idx = ,
- .lpv = ,
- },
- {
- /* 0xf: a == b, b == c, c == d, d == e */
- .pnum = UINT64_C(0x0002000300040005),
- .idx = ,
- .lpv = ,
- },
- };
- union {
- uint16_t u16[FWDSTEP + ];
- uint64_t u64;
- } *pnum = (void *)pn;
- int32_t v;
- dp1 = _mm_cmpeq_epi16(dp1, dp2); //按照16位一个单元来比较dp1和dp2
- dp1 = _mm_unpacklo_epi16(dp1, dp1); //按照16位一个单元将dp1与dp1来结合
- v = _mm_movemask_ps((__m128)dp1); //根据dp1的4个值形成4个位的掩码
- /*更新最后一次端口计数 update last port counter. */
- lp[] += gptbl[v].lpv;
- /*如果转发出口的值已经改变 if dest port value has changed. */
- if (v != GRPMSK) {
- lp = pnum->u16 + gptbl[v].idx;
- lp[] = ;
- pnum->u64 = gptbl[v].pnum;
- }
- return lp;
- }
- #endif /* APP_LOOKUP_METHOD */
- /* 线程执行函数 main processing loop */
- static int
- main_loop(__attribute__((unused)) void *dummy)
- {
- struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; //32个指针构成的数组
- unsigned lcore_id;
- uint64_t prev_tsc, diff_tsc, cur_tsc;
- int i, j, nb_rx;
- uint8_t portid, queueid;
- struct lcore_conf *qconf;
- const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - ) /
- US_PER_S * BURST_TX_DRAIN_US;
- #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
- (ENABLE_MULTI_BUFFER_OPTIMIZE == ))
- int32_t k;
- uint16_t dlp; //dlp为lp对应的转发出口编号
- uint16_t *lp; //lp指向pkts_burst中最后一次更新的元素
- uint16_t dst_port[MAX_PKT_BURST]; //dst_port是32个数据包的转发出口构成的数组
- __m128i dip[MAX_PKT_BURST / FWDSTEP]; //数据包的目的IP地址构成的数组
- uint32_t flag[MAX_PKT_BURST / FWDSTEP];
- uint16_t pnum[MAX_PKT_BURST + ]; //转发出口相同的数据包的编号
- #endif
- prev_tsc = ;
- lcore_id = rte_lcore_id(); //获取lcore_id
- qconf = &lcore_conf[lcore_id];//获取lcore_id的配置信息
- if (qconf->n_rx_queue == ) { //如果lcore上没有接收队列
- RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
- return ;
- }
- RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id);
- for (i = ; i < qconf->n_rx_queue; i++) { //遍历所有的接收队列
- portid = qconf->rx_queue_list[i].port_id; //得到物理端口的编号
- queueid = qconf->rx_queue_list[i].queue_id; //得到网卡队列的编号
- RTE_LOG(INFO, L3FWD, " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n", lcore_id,
- portid, queueid);
- }
- while () { //死循环,体现PMD思想
- cur_tsc = rte_rdtsc();
- /*
- * TX burst queue drain
- */
- diff_tsc = cur_tsc - prev_tsc; //计算时间差
- if (unlikely(diff_tsc > drain_tsc)) { //如果两次时间差大于定值
- /*
- * This could be optimized (use queueid instead of
- * portid), but it is not called so often
- */
- for (portid = ; portid < RTE_MAX_ETHPORTS; portid++) {//遍历所有的物理端口
- if (qconf->tx_mbufs[portid].len == )
- continue;
- send_burst(qconf,
- qconf->tx_mbufs[portid].len,
- portid);
- qconf->tx_mbufs[portid].len = ;
- }
- prev_tsc = cur_tsc; //记下前一时间
- }
- /* 从接收队列中读取数据包
- * Read packet from RX queues
- */
- for (i = ; i < qconf->n_rx_queue; ++i) { //遍历所有的接收队列
- portid = qconf->rx_queue_list[i].port_id;//得到物理端口的编号
- queueid = qconf->rx_queue_list[i].queue_id; //得到网卡队列的编号
- nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
- MAX_PKT_BURST); //在每个队列上尽量接收32个数据包,用nb_rx记录实际个数
- if (nb_rx == ) //如果一个包也没有收到
- continue;
- #if (ENABLE_MULTI_BUFFER_OPTIMIZE == 1) //如果支持Intel SSE4.1特性
- if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) //如果使用lpm
- k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); //整除4
- for (j = ; j != k; j += FWDSTEP) { //每次处理4个mbufs
- processx4_step1(&pkts_burst[j], //从4个mbufs中读取目的ip地址和ol_flags
- &dip[j / FWDSTEP],
- &flag[j / FWDSTEP]);
- }
- k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
- for (j = ; j != k; j += FWDSTEP) {//每次处理4个mbufs
- processx4_step2(qconf, dip[j / FWDSTEP], //在LPM中查找转发出口,如果失败则把进入的端口作为转发出口
- flag[j / FWDSTEP], portid,
- &pkts_burst[j], &dst_port[j]);
- }
- /* 完成包处理,并根据相同的转发出口来分组连续的数据包
- * Finish packet processing and group consecutive
- * packets with the same destination port.
- */
- k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);//处理成4的幂
- if (k != ) {
- __m128i dp1, dp2;
- lp = pnum;
- lp[] = ;
- processx4_step3(pkts_burst, dst_port); //更新目的mac和源mac地址
- /* dp1: <d[0], d[1], d[2], d[3], ... > */
- dp1 = _mm_loadu_si128((__m128i *)dst_port); //把目的端口加载到寄存器dp1中
- for (j = FWDSTEP; j != k; j += FWDSTEP) { //每次处理4个mbufs
- processx4_step3(&pkts_burst[j], //更新目的mac和源mac地址
- &dst_port[j]);
- /*
- * dp2:
- * <d[j-3], d[j-2], d[j-1], d[j], ... >
- */
- dp2 = _mm_loadu_si128((__m128i *) //返回一个__m128i的寄存器
- &dst_port[j - FWDSTEP + ]);
- lp = port_groupx4(&pnum[j - FWDSTEP], //把出口相同的4个数据包构成一组
- lp, dp1, dp2);
- /*
- * dp1:
- * <d[j], d[j+1], d[j+2], d[j+3], ... >
- */
- dp1 = _mm_srli_si128(dp2, //逻辑左移3*16位,返回一个__m128i的寄存器
- (FWDSTEP - ) *
- sizeof(dst_port[]));
- }
- /*
- * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... >
- */
- dp2 = _mm_shufflelo_epi16(dp1, 0xf9); //重新排序,返回一个__m128i的寄存器
- lp = port_groupx4(&pnum[j - FWDSTEP], lp, //把4个连续分组按照目的端口分组
- dp1, dp2);
- /*
- * remove values added by the last repeated
- * dst port.
- */
- lp[]--;
- dlp = dst_port[j - ];
- } else {
- /* set dlp and lp to the never used values. */
- dlp = BAD_PORT - ;
- lp = pnum + MAX_PKT_BURST;
- }
- /*处理最后的三个分组 Process up to last 3 packets one by one. */
- switch (nb_rx % FWDSTEP) {
- case : //第三个mbuf
- process_packet(qconf, pkts_burst[j],
- dst_port + j, portid);
- GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
- j++;
- case ://第二个mbuf
- process_packet(qconf, pkts_burst[j],
- dst_port + j, portid);
- GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
- j++;
- case ://第一个mbuf
- process_packet(qconf, pkts_burst[j],
- dst_port + j, portid);
- GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
- j++;
- }
- /*通过目的端口把数据包都发出去,这些数据包之前已经组合好了的
- * Send packets out, through destination port.
- * Consecuteve pacekts with the same destination port
- * are already grouped together.
- * If destination port for the packet equals BAD_PORT,
- * then free the packet without sending it out.
- */
- for (j = ; j < nb_rx; j += k) { //遍历接收到的数据包
- int32_t m;
- uint16_t pn;
- pn = dst_port[j];
- k = pnum[j];
- if (likely(pn != BAD_PORT)) {
- send_packetsx4(qconf, pn, //把待发送的数据包放到发送缓冲区中,累积到32个再发出去
- pkts_burst + j, k);
- } else {
- for (m = j; m != j + k; m++)
- rte_pktmbuf_free(pkts_burst[m]);
- }
- }
- #endif /* APP_LOOKUP_METHOD */
- #else /*如果不支持Intel SSE4.1特性 ENABLE_MULTI_BUFFER_OPTIMIZE == 0 */
- /*预取接收队列上的第一个数据包 Prefetch first packets */
- for (j = ; j < PREFETCH_OFFSET && j < nb_rx; j++) {
- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *));
- }
- /*预取和转发已经预取的数据包 Prefetch and forward already prefetched packets */
- for (j = ; j < (nb_rx - PREFETCH_OFFSET); j++) {
- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
- j + PREFETCH_OFFSET], void *));
- l3fwd_simple_forward(pkts_burst[j], portid, qconf);//简单转发4倍数的数据包
- }
- /*转发正在预取的数据包 Forward remaining prefetched packets */
- for (; j < nb_rx; j++) {
- l3fwd_simple_forward(pkts_burst[j], portid, qconf);//简单转发剩余几个数据包
- }
- #endif /* ENABLE_MULTI_BUFFER_OPTIMIZE */
- } //for (i = 0; i < qconf->n_rx_queue; ++i)
- } //while (1)
- }//end of main_loop
- static int //检查lcore的参数
- check_lcore_params(void)
- {
- uint8_t queue, lcore;
- uint16_t i;
- int socketid;
- for (i = ; i < nb_lcore_params; ++i) { //遍历lcores的参数表
- queue = lcore_params[i].queue_id;
- if (queue >= MAX_RX_QUEUE_PER_PORT) { //如果队列编号大于128
- printf("invalid queue number: %hhu\n", queue);
- return -;
- }
- lcore = lcore_params[i].lcore_id;
- if (!rte_lcore_is_enabled(lcore)) { //如果lcore没有启用
- printf("error: lcore %hhu is not enabled in lcore mask\n", lcore);
- return -;
- }
- if ((socketid = rte_lcore_to_socket_id(lcore) != ) &&
- (numa_on == )) { //如果numa关闭
- printf("warning: lcore %hhu is on socket %d with numa off \n",
- lcore, socketid);
- }
- }
- return ;
- }
- static int //检查物理端口的配置
- check_port_config(const unsigned nb_ports)
- {
- unsigned portid;
- uint16_t i;
- for (i = ; i < nb_lcore_params; ++i) { //遍历lcores的参数表
- portid = lcore_params[i].port_id;
- if ((enabled_port_mask & ( << portid)) == ) {
- printf("port %u is not enabled in port mask\n", portid);
- return -;
- }
- if (portid >= nb_ports) {
- printf("port %u is not present on the board\n", portid);
- return -;
- }
- }
- return ;
- }
- static uint8_t //获取物理端口上的接收队列数量
- get_port_n_rx_queues(const uint8_t port) //其实就是取queue_id最大值加1
- {
- int queue = -;
- uint16_t i;
- for (i = ; i < nb_lcore_params; ++i) { //遍历lcores的参数表
- if (lcore_params[i].port_id == port && lcore_params[i].queue_id > queue)
- queue = lcore_params[i].queue_id;//获取queue_id值
- }
- return (uint8_t)(++queue); //因为queue_id从0开始
- }
- static int //初始化lcore上的接收队列
- init_lcore_rx_queues(void)
- {
- uint16_t i, nb_rx_queue;
- uint8_t lcore;
- for (i = ; i < nb_lcore_params; ++i) {//遍历lcores的参数表
- lcore = lcore_params[i].lcore_id;
- nb_rx_queue = lcore_conf[lcore].n_rx_queue;
- if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {//如果接收队列总数大于128
- printf("error: too many queues (%u) for lcore: %u\n",
- (unsigned)nb_rx_queue + , (unsigned)lcore);
- return -;
- } else {
- lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
- lcore_params[i].port_id; //记录port_id
- lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
- lcore_params[i].queue_id; //记录queue_id
- lcore_conf[lcore].n_rx_queue++;//lcore上接收队列的数量加1
- }
- }
- return ;
- }
- /* display usage */
- static void //打印使用说明
- print_usage(const char *prgname)
- {
- printf ("%s [EAL options] -- -p PORTMASK -P"
- " [--config (port,queue,lcore)[,(port,queue,lcore]]"
- " [--enable-jumbo [--max-pkt-len PKTLEN]]\n"
- " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
- " -P : enable promiscuous mode\n"
- " --config (port,queue,lcore): rx queues configuration\n"
- " --no-numa: optional, disable numa awareness\n"
- " --ipv6: optional, specify it if running ipv6 packets\n"
- " --enable-jumbo: enable jumbo frame"
- " which max packet len is PKTLEN in decimal (64-9600)\n"
- " --hash-entry-num: specify the hash entry number in hexadecimal to be setup\n",
- prgname);
- }
- static int //分析数据包的长度
- parse_max_pkt_len(const char *pktlen)
- {
- char *end = NULL;
- unsigned long len;
- /* parse decimal string */
- len = strtoul(pktlen, &end, ); //把字符串转换成十进制数字
- if ((pktlen[] == '\0') || (end == NULL) || (*end != '\0'))
- return -;
- if (len == )
- return -;
- return len;
- }
- static int //分析物理端口的掩码
- parse_portmask(const char *portmask)
- {
- char *end = NULL;
- unsigned long pm;
- /* parse hexadecimal string */
- pm = strtoul(portmask, &end, );//字符串转换为十六进制的数字
- if ((portmask[] == '\0') || (end == NULL) || (*end != '\0'))
- return -;
- if (pm == )
- return -;
- return pm;
- }
- #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
- static int
- parse_hash_entry_number(const char *hash_entry_num)
- {
- char *end = NULL;
- unsigned long hash_en;
- /* parse hexadecimal string */
- hash_en = strtoul(hash_entry_num, &end, );
- if ((hash_entry_num[] == '\0') || (end == NULL) || (*end != '\0'))
- return -;
- if (hash_en == )
- return -;
- return hash_en;
- }
- #endif
- static int //分析参数中的配置
- parse_config(const char *q_arg)
- {
- char s[];
- const char *p, *p0 = q_arg;
- char *end;
- enum fieldnames {
- FLD_PORT = ,
- FLD_QUEUE,
- FLD_LCORE,
- _NUM_FLD
- };
- unsigned long int_fld[_NUM_FLD];
- char *str_fld[_NUM_FLD];
- int i;
- unsigned size;
- nb_lcore_params = ; //数组的元素个数初始化为0
- //举例: --config="(0,0,1),(0,1,2),(1,0,1),(1,1,3)"
- while ((p = strchr(p0,'(')) != NULL) { //找到左括号的位置,并赋值给p,除非找不到左括号才结束while循环
- ++p;
- if((p0 = strchr(p,')')) == NULL) //找到有括号的位置,并赋值给p0
- return -;
- size = p0 - p; //计算括号内的字符串长度
- if(size >= sizeof(s))
- return -;
- snprintf(s, sizeof(s), "%.*s", size, p); //按照size宽度拼接字符串s
- if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)//分割字符串s到str_fld中
- return -;
- for (i = ; i < _NUM_FLD; i++){//遍历各个成员
- errno = ;
- int_fld[i] = strtoul(str_fld[i], &end, );//获取port_id、queue_id、lcore_id成员的值
- if (errno != || end == str_fld[i] || int_fld[i] > )
- return -;
- }
- if (nb_lcore_params >= MAX_LCORE_PARAMS) {
- printf("exceeded max number of lcore params: %hu\n",
- nb_lcore_params);
- return -;
- }
- lcore_params_array[nb_lcore_params].port_id = (uint8_t)int_fld[FLD_PORT];//赋值port_id
- lcore_params_array[nb_lcore_params].queue_id = (uint8_t)int_fld[FLD_QUEUE];//赋值queue_id
- lcore_params_array[nb_lcore_params].lcore_id = (uint8_t)int_fld[FLD_LCORE];//赋值lcore_id
- ++nb_lcore_params; //数组的元素个数自增
- }
- lcore_params = lcore_params_array;//使用新配置,抛弃默认配置
- return ;
- }
- #define CMD_LINE_OPT_CONFIG "config"
- #define CMD_LINE_OPT_NO_NUMA "no-numa"
- #define CMD_LINE_OPT_IPV6 "ipv6"
- #define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo"
- #define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num"
- /* Parse the argument given in the command line of the application */
- static int //分析l3fwd相关的参数
- parse_args(int argc, char **argv)
- {
- int opt, ret;
- char **argvopt;
- int option_index;
- char *prgname = argv[];
- static struct option lgopts[] = {
- {CMD_LINE_OPT_CONFIG, , , }, //config参数对应于case 0
- {CMD_LINE_OPT_NO_NUMA, , , },
- {CMD_LINE_OPT_IPV6, , , },
- {CMD_LINE_OPT_ENABLE_JUMBO, , , },
- {CMD_LINE_OPT_HASH_ENTRY_NUM, , , },
- {NULL, , , }//应该可以在这个地方加上kni_config命令字
- };
- argvopt = argv;
- while ((opt = getopt_long(argc, argvopt, "p:P",
- lgopts, &option_index)) != EOF) {
- switch (opt) {
- /* portmask 物理端口的掩码*/
- case 'p':
- enabled_port_mask = parse_portmask(optarg);//optarg为指向当前选项参数的指针
- if (enabled_port_mask == ) {
- printf("invalid portmask\n");
- print_usage(prgname);
- return -;
- }
- break;
- case 'P': //混杂模式
- printf("Promiscuous mode selected\n");
- promiscuous_on = ;
- break;
- /* long options 解析长选项 */
- case :
- if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_CONFIG,
- sizeof (CMD_LINE_OPT_CONFIG))) { //参数config
- ret = parse_config(optarg);//解析()中的参数
- if (ret) {
- printf("invalid config\n");
- print_usage(prgname);
- return -;
- }
- }
- if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_NO_NUMA,
- sizeof(CMD_LINE_OPT_NO_NUMA))) { //参数no-numa
- printf("numa is disabled \n");
- numa_on = ;
- }
- #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
- if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_IPV6,
- sizeof(CMD_LINE_OPT_IPV6))) { //参数ipv6
- printf("ipv6 is specified \n");
- ipv6 = ;
- }
- #endif
- if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_ENABLE_JUMBO,
- sizeof (CMD_LINE_OPT_ENABLE_JUMBO))) {//参数enable-jumbo
- struct option lenopts = {"max-pkt-len", required_argument, , };
- printf("jumbo frame is enabled - disabling simple TX path\n");
- port_conf.rxmode.jumbo_frame = ;
- /* if no max-pkt-len set, use the default value ETHER_MAX_LEN */
- if ( == getopt_long(argc, argvopt, "", &lenopts, &option_index)) {
- ret = parse_max_pkt_len(optarg); //分析数据包的长度
- if ((ret < ) || (ret > MAX_JUMBO_PKT_LEN)){
- printf("invalid packet length\n");
- print_usage(prgname);
- return -;
- }
- port_conf.rxmode.max_rx_pkt_len = ret;
- }
- printf("set jumbo frame max packet length to %u\n",
- (unsigned int)port_conf.rxmode.max_rx_pkt_len);
- }
- #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
- if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_HASH_ENTRY_NUM,
- sizeof(CMD_LINE_OPT_HASH_ENTRY_NUM))) {//参数hash-entry-num
- ret = parse_hash_entry_number(optarg);
- if ((ret > ) && (ret <= L3FWD_HASH_ENTRIES)) {
- hash_entry_number = ret;
- } else {
- printf("invalid hash entry number\n");
- print_usage(prgname);
- return -;
- }
- }
- #endif
- break;
- default:
- print_usage(prgname);
- return -;
- }
- }
- if (optind >= )
- argv[optind-] = prgname;
- ret = optind-;
- optind = ; /* optind是下一个选项的索引 reset getopt lib */
- return ret;
- }
- static void //打印mac地址
- print_ethaddr(const char *name, const struct ether_addr *eth_addr)
- {
- char buf[ETHER_ADDR_FMT_SIZE];
- ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
- printf("%s%s", name, buf);
- }
- #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
- static void //创建LPM
- setup_lpm(int socketid)
- {
- struct rte_lpm6_config config;
- unsigned i;
- int ret;
- char s[];
- /* 创建LPM ipv4表 create the LPM table */
- snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid);
- ipv4_l3fwd_lookup_struct[socketid] = rte_lpm_create(s, socketid,
- IPV4_L3FWD_LPM_MAX_RULES, );
- if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
- rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
- " on socket %d\n", socketid);
- /* 填充ipv4 LPM表 populate the LPM table */
- for (i = ; i < IPV4_L3FWD_NUM_ROUTES; i++) {//遍历已经配置的所有的规则
- /* skip unused ports 跳过未使用的物理端口*/
- if (( << ipv4_l3fwd_route_array[i].if_out &
- enabled_port_mask) == )
- continue;
- //添加一条路由,即把规则转换为tbl24或者tbl8
- ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid],
- ipv4_l3fwd_route_array[i].ip,
- ipv4_l3fwd_route_array[i].depth,
- ipv4_l3fwd_route_array[i].if_out);
- if (ret < ) { //如果添加路由失败
- rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
- "l3fwd LPM table on socket %d\n",
- i, socketid);
- }
- printf("LPM: Adding route 0x%08x / %d (%d)\n",
- (unsigned)ipv4_l3fwd_route_array[i].ip,
- ipv4_l3fwd_route_array[i].depth,
- ipv4_l3fwd_route_array[i].if_out);
- }
- /* 创建lpm ipv6表 create the LPM6 table */
- snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid);
- config.max_rules = IPV6_L3FWD_LPM_MAX_RULES;
- config.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S;
- config.flags = ;
- ipv6_l3fwd_lookup_struct[socketid] = rte_lpm6_create(s, socketid,
- &config);
- if (ipv6_l3fwd_lookup_struct[socketid] == NULL)
- rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
- " on socket %d\n", socketid);
- /* 填充LPM ipv6表 populate the LPM table */
- for (i = ; i < IPV6_L3FWD_NUM_ROUTES; i++) {
- /* skip unused ports */
- if (( << ipv6_l3fwd_route_array[i].if_out &
- enabled_port_mask) == )
- continue;
- ret = rte_lpm6_add(ipv6_l3fwd_lookup_struct[socketid],
- ipv6_l3fwd_route_array[i].ip,
- ipv6_l3fwd_route_array[i].depth,
- ipv6_l3fwd_route_array[i].if_out);
- if (ret < ) {
- rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
- "l3fwd LPM table on socket %d\n",
- i, socketid);
- }
- printf("LPM: Adding route %s / %d (%d)\n",
- "IPV6",
- ipv6_l3fwd_route_array[i].depth,
- ipv6_l3fwd_route_array[i].if_out);
- }
- }
- #endif
- static int //初始化内存
- init_mem(unsigned nb_mbuf)
- {
- struct lcore_conf *qconf;
- int socketid;
- unsigned lcore_id;
- char s[];
- for (lcore_id = ; lcore_id < RTE_MAX_LCORE; lcore_id++) {//遍历所有lcores
- if (rte_lcore_is_enabled(lcore_id) == )
- continue;
- if (numa_on) //一般开启了numa
- socketid = rte_lcore_to_socket_id(lcore_id);//得到lcore所在的socketid
- else
- socketid = ; //默认socketid为0
- if (socketid >= NB_SOCKETS) {
- rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n",
- socketid, lcore_id, NB_SOCKETS);
- }
- if (pktmbuf_pool[socketid] == NULL) {
- snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
- pktmbuf_pool[socketid] = //为每一个socket创建mempool用来动态分配mbufs
- rte_mempool_create(s, nb_mbuf, MBUF_SIZE, MEMPOOL_CACHE_SIZE,
- sizeof(struct rte_pktmbuf_pool_private),
- rte_pktmbuf_pool_init, NULL,
- rte_pktmbuf_init, NULL,
- socketid, );
- if (pktmbuf_pool[socketid] == NULL)
- rte_exit(EXIT_FAILURE,
- "Cannot init mbuf pool on socket %d\n", socketid);
- else
- printf("Allocated mbuf pool on socket %d\n", socketid);
- #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
- setup_lpm(socketid); //创建LPM表,只需给每个socket cpu创建一个LPM表,而同一个CPU上的lcores共享LPM
- #else
- setup_hash(socketid); //创建Hash表
- #endif
- }
- qconf = &lcore_conf[lcore_id];
- qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid];
- qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid];
- }
- return ;
- }
- /* Check the link status of all ports in up to 9s, and print them finally */
- static void //检查物理端口的连接状态
- check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
- {
- #define CHECK_INTERVAL 100 /* 100ms */
- #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
- uint8_t portid, count, all_ports_up, print_flag = ;
- struct rte_eth_link link;
- printf("\nChecking link status");
- fflush(stdout);
- for (count = ; count <= MAX_CHECK_TIME; count++) {//最多执行9000次
- all_ports_up = ;
- for (portid = ; portid < port_num; portid++) {//遍历物理端口
- if ((port_mask & ( << portid)) == )
- continue;
- memset(&link, , sizeof(link));
- rte_eth_link_get_nowait(portid, &link);
- /* print link status if flag set */
- if (print_flag == ) {
- if (link.link_status)
- printf("Port %d Link Up - speed %u "
- "Mbps - %s\n", (uint8_t)portid,
- (unsigned)link.link_speed,
- (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
- ("full-duplex") : ("half-duplex\n"));
- else
- printf("Port %d Link Down\n",
- (uint8_t)portid);
- continue;
- }
- /* clear all_ports_up flag if any link down */
- if (link.link_status == ) {
- all_ports_up = ;
- break;
- }
- }
- /* after finally printing all link status, get out */
- if (print_flag == )
- break;
- if (all_ports_up == ) {
- printf(".");
- fflush(stdout);
- rte_delay_ms(CHECK_INTERVAL);
- }
- /* set the print_flag if all ports up or timeout */
- if (all_ports_up == || count == (MAX_CHECK_TIME - )) {
- print_flag = ;
- printf("done\n");
- }
- }
- }
- int //主函数
- main(int argc, char **argv)
- {
- struct lcore_conf *qconf;
- struct rte_eth_dev_info dev_info;
- struct rte_eth_txconf *txconf;
- int ret;
- unsigned nb_ports;
- uint16_t queueid;
- unsigned lcore_id;
- uint32_t n_tx_queue, nb_lcores;
- uint8_t portid, nb_rx_queue, queue, socketid;
- /* init EAL */
- ret = rte_eal_init(argc, argv); //初始化软件抽象层,并解析EAL有关参数
- if (ret < )
- rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
- argc -= ret; //减少参数个数
- argv += ret; //移动参数位置
- /* parse application arguments (after the EAL ones) */
- ret = parse_args(argc, argv); //解析l3fwd有关参数: -p -P --config
- if (ret < )
- rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n");
- if (check_lcore_params() < ) //检查lcore参数
- rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
- ret = init_lcore_rx_queues(); //初始化每个lcore上的rx queue数量
- if (ret < )
- rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n");
- nb_ports = rte_eth_dev_count(); //获取物理端口的个数
- if (nb_ports > RTE_MAX_ETHPORTS) //如果超过32个
- nb_ports = RTE_MAX_ETHPORTS;
- if (check_port_config(nb_ports) < ) //检查物理端口的配置
- rte_exit(EXIT_FAILURE, "check_port_config failed\n");
- nb_lcores = rte_lcore_count(); //获取启用的lcores的总个数
- /* initialize all ports 初始化所有的物理端口 */
- for (portid = ; portid < nb_ports; portid++) { //遍历所有的物理端口
- /* skip ports that are not enabled 跳过没有启用的物理端口 */
- if ((enabled_port_mask & ( << portid)) == ) {
- printf("\nSkipping disabled port %d\n", portid);
- continue;
- }
- /* init port 初始化物理端口*/
- printf("Initializing port %d ... ", portid );
- fflush(stdout); //清空标准输出(屏幕)的缓冲区,这样就能立即在屏幕上看到打印信息
- nb_rx_queue = get_port_n_rx_queues(portid); //获取portid上的接收队列的个数
- n_tx_queue = nb_lcores; //设定portid上的发送队列的个数为启用的lcores的个数
- if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) //如果发送队列的数量超过16个
- n_tx_queue = MAX_TX_QUEUE_PER_PORT;
- printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
- nb_rx_queue, (unsigned)n_tx_queue ); //这里是不是有点粗暴啊?????
- ret = rte_eth_dev_configure(portid, nb_rx_queue, //第一步,配置网络设备
- (uint16_t)n_tx_queue, &port_conf);
- if (ret < ) //如果配置设备失败
- rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n",
- ret, portid);
- rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); //记录mac地址到ports_eth_addr[portid]
- print_ethaddr(" Address:", &ports_eth_addr[portid]);
- printf(", ");
- /* 为每一个物理端口准备着源mac地址和目的mac地址
- * prepare dst and src MACs for each port.
- */
- *(uint64_t *)(val_eth + portid) =
- ETHER_LOCAL_ADMIN_ADDR + ((uint64_t)portid << );
- ether_addr_copy(&ports_eth_addr[portid], //前一个参数为from,后一个为to
- (struct ether_addr *)(val_eth + portid) + );
- /* init memory 分配内存并创建LPM或者hash */
- ret = init_mem(NB_MBUF); //mempool包含8192个元素
- if (ret < )
- rte_exit(EXIT_FAILURE, "init_mem failed\n");
- /*初始化一个发送队列成一对(lcore, port) init one TX queue per couple (lcore,port) */
- queueid = ;
- for (lcore_id = ; lcore_id < RTE_MAX_LCORE; lcore_id++) { //遍历一个物理接口上的所有的lcores
- if (rte_lcore_is_enabled(lcore_id) == ) //忽略未启用的lcore
- continue;
- if (numa_on)//如果启用numa
- socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id); //获取lcore_id所在的socketid
- else
- socketid = ;//默认socketid为0
- printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
- fflush(stdout);//清空标准输出(屏幕)的缓冲区
- rte_eth_dev_info_get(portid, &dev_info);//获取设备信息
- txconf = &dev_info.default_txconf;//得到发送的配置结构体指针
- if (port_conf.rxmode.jumbo_frame)
- txconf->txq_flags = ;
- ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, //第二步,建立发送队列
- socketid, txconf); //一个port上可能有多个queue,每个queue用一个lcore来绑定
- if (ret < )
- rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, "
- "port=%d\n", ret, portid);
- qconf = &lcore_conf[lcore_id]; //得到lcore_id的配置结构体指针
- qconf->tx_queue_id[portid] = queueid; //记录发送队列的编号到lcore_conf中
- queueid++; //发送队列的编号自增
- } //end of for(lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
- printf("\n");
- } //end of for(portid = 0; portid < nb_ports; portid++)
- for (lcore_id = ; lcore_id < RTE_MAX_LCORE; lcore_id++) { //遍历所有的lcores
- if (rte_lcore_is_enabled(lcore_id) == )
- continue; //忽略未启用的lcore
- qconf = &lcore_conf[lcore_id];
- printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
- fflush(stdout);
- /* init RX queues 初始化接收队列 */
- for(queue = ; queue < qconf->n_rx_queue; ++queue) { //遍历所有的接收队列
- portid = qconf->rx_queue_list[queue].port_id; //物理端口的编号
- queueid = qconf->rx_queue_list[queue].queue_id;//接收队列的编号
- if (numa_on)//一般启用numa
- socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id);//获取lcore_id所在的socketid
- else
- socketid = ;//默认socketid为0
- printf("rxq=%d,%d,%d ", portid, queueid, socketid);
- fflush(stdout);//清空标准输出(屏幕)的缓冲区
- ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, //第三步,建立接收队列
- socketid, //一个port上可能有多个queue,每个queue用一个lcore来绑定
- NULL,
- pktmbuf_pool[socketid]);
- if (ret < )
- rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d,"
- "port=%d\n", ret, portid);
- } //for(queue = 0; queue < qconf->n_rx_queue; ++queue)
- }//for(lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
- printf("\n");
- /* start ports 启动物理端口 */
- for (portid = ; portid < nb_ports; portid++) { //遍历所有的物理端口
- if ((enabled_port_mask & ( << portid)) == ) {
- continue; //忽略未启用的物理端口
- }
- /* Start device 启动设备 */
- ret = rte_eth_dev_start(portid); //第四步,启动物理端口
- if (ret < )
- rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n",
- ret, portid);
- /*
- * If enabled, put device in promiscuous mode.
- * This allows IO forwarding mode to forward packets
- * to itself through 2 cross-connected ports of the
- * target machine.
- */
- if (promiscuous_on) //如果开始混杂模式
- rte_eth_promiscuous_enable(portid); //启动混杂模式
- }//end of for (portid = 0; portid < nb_ports; portid++)
- check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
- /* launch per-lcore init on every lcore 在每一个lcore上至多启动一个线程 */
- rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);//CALL_MASTER表示在master也会启动线程
- RTE_LCORE_FOREACH_SLAVE(lcore_id) { //遍历每个slave lcore
- if (rte_eal_wait_lcore(lcore_id) < ) //等待线程结束
- return -;
- }
- return ;
- }
DPDK l3fwd的更多相关文章
- DPDK L3fwd 源码阅读
代码部分 整个L3fwd有三千多行代码,但总体思想就是在L2fwd的基础上,增加网络层的根据 IP 地址进行路由查找的内容. main.c 文件 int main(int argc, char **a ...
- Linux平台上DPDK入门指南
1. 简介 本文档包含DPDK软件安装和配置的相关说明.旨在帮助用户快速启动和运行软件.文档主要描述了在Linux环境下编译和 运行DPDK应用程序,但是文档并不深入DPDK的具体实现细节. 1.1. ...
- [dpdk] 熟悉SDK与初步使用 (四)(L3 Forwarding源码分析)
接续前节:[dpdk] 熟悉SDK与初步使用 (三)(IP Fragmentation源码分析) 前文中的最后一个问题,搁置,并没有找到答案.所以继续阅读其他例子的代码,想必定能在其他位置看到答案. ...
- [dpdk] 读官方文档(3)
续前节, 测试小程序 1. 想编译测试程序首先需要设置两个环境变量,为什么呢,因为测试程序的Makefile里用了... rpm装了打包好的devel包,这个rpm也会自带这两个环境变量.就是说写第三 ...
- [developmemt][dpdk] dpdk优化(转)
转发:https://software.intel.com/en-us/articles/dpdk-performance-optimization-guidelines-white-paper 转发 ...
- [daily][dpdk] 网卡offload识别包类型;如何模拟环境构造一个vlan包
第一部分 硬件识别包类型 网卡,是可以识别包类型的.在dpdk的API中.识别完之后,存在这个结构里: struct rte_mbuf { ...... union { uint32_t packet ...
- dpdk优化相关 转
注:本文是参照了一些其他文章,原文地址点击这里. 首先根据这篇文章进行了性能瓶颈的分析 策略与方法 首先根据木桶原理,首先要找到最弱的地方,怎么找往上看↑. 想能优化需要考虑如下: 优化BIOS设置 ...
- DPDK应用示例指南简介(汇总)
DPDK应用示例指南简介 <DPDK示例>系列文章主要是学习.记录.翻译DPDK官方示例文档.为了更好地理解和学习DPDK, 特通过对源码中的经典示例进行整理,供大家学习.交流和讨论. A ...
- Intel 推出 DPDK 开发包的意义是什么?
Intel 推出 DPDK 开发包的意义是什么? http://www.zhihu.com/question/27413080?sort=created 基于intel dpdk的包处理器,相较于基于 ...
随机推荐
- sql 循环表中记录
=========================================================================循环排序查询数据=================== ...
- Leetcode-Combinations Sum II
Given a collection of candidate numbers (C) and a target number (T), find all unique combinations in ...
- Leetcode-Convert Sorted Array to BST
Given an array where elements are sorted in ascending order, convert it to a height balanced BST. So ...
- 170228、Linux操作系统安装ELK stack日志管理系统--(1)Logstash和Filebeat的安装与使用
安装测试环境:Ubuntu 16.04.2 LTS 前言 (1)ELK是Elasticsearch,Logstash,Kibana 开源软件的集合,对外是作为一个日志管理系统的开源方案.它可以从任何来 ...
- SQL架构信息读取
--架构: select * from information_schema.SCHEMATA --表: select table_name from information_schema.table ...
- WebBrowser 控件-说明
WebBrowser.Document 为活动的文档返回自动化对象,引用 Microsoft HTML Object Library 可查看详细属性和方法 下面的解说假设窗体中有一个名称为 Web1 ...
- Maven 整合SSH框架之pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/20 ...
- JS改变HTML元素的绝对坐标
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DT ...
- 【Linux】Ubuntu下录屏&&制作GIF
在做Android的时候,想制作GIF用来演示效果.一番摸索.找到了一个简单可行的办法: App在模拟器中执行,用录屏软件录制.再将视频转成GIF. 系统: Ubuntu 15.04 录屏软件: Re ...
- 使用jQuery重用form表单并异步提交到其它action
在做页面开发的时候,有时候要重用表单的数据,并异步请求提交到其它的链接中,这个时候就能够使用jquery去改动表单的action值(记得使用后改动回来).并调用submit方法,当然后台的链接acti ...