l3fwd负责三层转发,比l2fwd要复杂点。

  1. /*-
  2. * BSD LICENSE
  3. *
  4. * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
  5. * All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. *
  11. * * Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * * Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in
  15. * the documentation and/or other materials provided with the
  16. * distribution.
  17. * * Neither the name of Intel Corporation nor the names of its
  18. * contributors may be used to endorse or promote products derived
  19. * from this software without specific prior written permission.
  20. *
  21. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32. */
  33.  
  34. #include <stdio.h>
  35. #include <stdlib.h>
  36. #include <stdint.h>
  37. #include <inttypes.h>
  38. #include <sys/types.h>
  39. #include <string.h>
  40. #include <sys/queue.h>
  41. #include <stdarg.h>
  42. #include <errno.h>
  43. #include <getopt.h>
  44.  
  45. #include <rte_common.h>
  46. #include <rte_vect.h>
  47. #include <rte_byteorder.h>
  48. #include <rte_log.h>
  49. #include <rte_memory.h>
  50. #include <rte_memcpy.h>
  51. #include <rte_memzone.h>
  52. #include <rte_eal.h>
  53. #include <rte_per_lcore.h>
  54. #include <rte_launch.h>
  55. #include <rte_atomic.h>
  56. #include <rte_cycles.h>
  57. #include <rte_prefetch.h>
  58. #include <rte_lcore.h>
  59. #include <rte_per_lcore.h>
  60. #include <rte_branch_prediction.h>
  61. #include <rte_interrupts.h>
  62. #include <rte_pci.h>
  63. #include <rte_random.h>
  64. #include <rte_debug.h>
  65. #include <rte_ether.h>
  66. #include <rte_ethdev.h>
  67. #include <rte_ring.h>
  68. #include <rte_mempool.h>
  69. #include <rte_mbuf.h>
  70. #include <rte_ip.h>
  71. #include <rte_tcp.h>
  72. #include <rte_udp.h>
  73. #include <rte_string_fns.h>
  74.  
  75. #define APP_LOOKUP_EXACT_MATCH 0
  76. #define APP_LOOKUP_LPM 1
  77. #define DO_RFC_1812_CHECKS
  78.  
  79. #ifndef APP_LOOKUP_METHOD //默认使用LPM来路由
  80. #define APP_LOOKUP_METHOD APP_LOOKUP_LPM
  81. #endif
  82.  
  83. /*
  84. * 0表示未优化 When set to zero, simple forwaring path is eanbled.
  85. * 1表示优化 When set to one, optimized forwarding path is enabled.
  86. * LPM会用到SSE4.1特性 Note that LPM optimisation path uses SSE4.1 instructions.
  87. * 注意: 发现深圳测试机的CPU支持的是SSE 4.2特性,不知道会不会有影响呢???
  88. */
  89. #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && !defined(__SSE4_1__))
  90. #define ENABLE_MULTI_BUFFER_OPTIMIZE 0
  91. #else
  92. #define ENABLE_MULTI_BUFFER_OPTIMIZE 1
  93. #endif
  94.  
  95. #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
  96. #include <rte_hash.h>
  97. #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
  98. #include <rte_lpm.h>
  99. #include <rte_lpm6.h>
  100. #else
  101. #error "APP_LOOKUP_METHOD set to incorrect value"
  102. #endif
  103.  
  104. #ifndef IPv6_BYTES
  105. #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\
  106. "%02x%02x:%02x%02x:%02x%02x:%02x%02x"
  107. #define IPv6_BYTES(addr) \
  108. addr[], addr[], addr[], addr[], \
  109. addr[], addr[], addr[], addr[], \
  110. addr[], addr[], addr[], addr[],\
  111. addr[], addr[],addr[], addr[]
  112. #endif
  113.  
  114. #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1
  115.  
  116. #define MAX_JUMBO_PKT_LEN 9600
  117.  
  118. #define IPV6_ADDR_LEN 16
  119.  
  120. #define MEMPOOL_CACHE_SIZE 256
  121.  
  122. #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
  123.  
  124. /*
  125. * This expression is used to calculate the number of mbufs needed depending on user input, taking
  126. * into account memory for rx and tx hardware rings, cache per lcore and mtable per port per lcore.
  127. * RTE_MAX is used to ensure that NB_MBUF never goes below a minimum value of 8192
  128. */
  129.  
  130. #define NB_MBUF RTE_MAX ( \
  131. (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \
  132. nb_ports*nb_lcores*MAX_PKT_BURST + \
  133. nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \
  134. nb_lcores*MEMPOOL_CACHE_SIZE), \
  135. (unsigned))
  136.  
  137. #define MAX_PKT_BURST 32
  138. #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
  139.  
  140. /*
  141. * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send.
  142. */
  143. #define MAX_TX_BURST (MAX_PKT_BURST / 2)
  144.  
  145. #define NB_SOCKETS 8
  146.  
  147. /* Configure how many packets ahead to prefetch, when reading packets */
  148. #define PREFETCH_OFFSET 3
  149.  
  150. /* Used to mark destination port as 'invalid'. */
  151. #define BAD_PORT ((uint16_t)-1)
  152.  
  153. #define FWDSTEP 4
  154.  
  155. /*
  156. * Configurable number of RX/TX ring descriptors
  157. */
  158. #define RTE_TEST_RX_DESC_DEFAULT 128
  159. #define RTE_TEST_TX_DESC_DEFAULT 512
  160. static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
  161. static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
  162.  
  163. /* ethernet addresses of ports */
  164. static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
  165.  
  166. static __m128i val_eth[RTE_MAX_ETHPORTS];
  167.  
  168. /* replace first 12B of the ethernet header. */
  169. #define MASK_ETH 0x3f
  170.  
  171. /* mask of enabled ports */
  172. static uint32_t enabled_port_mask = ;
  173. static int promiscuous_on = ; /**< Ports set in promiscuous mode off by default. */
  174. static int numa_on = ; /**< NUMA is enabled by default. */
  175.  
  176. #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
  177. static int ipv6 = ; /**< ipv6 is false by default. */
  178. #endif
  179.  
  180. struct mbuf_table {
  181. uint16_t len; //实际个数???
  182. struct rte_mbuf *m_table[MAX_PKT_BURST];
  183. };
  184.  
  185. struct lcore_rx_queue {
  186. uint8_t port_id; //物理端口的编号
  187. uint8_t queue_id;//网卡队列的编号
  188. } __rte_cache_aligned;
  189.  
  190. #define MAX_RX_QUEUE_PER_LCORE 16 //每个lcore上最多有16个接收队列
  191. #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS //每个物理端口上最多32个发送队列
  192. #define MAX_RX_QUEUE_PER_PORT 128 //每个物理端口上最多128个接收队列
  193.  
  194. #define MAX_LCORE_PARAMS 1024
  195. struct lcore_params {
  196. uint8_t port_id; //物理端口的编号
  197. uint8_t queue_id; //网卡队列的编号
  198. uint8_t lcore_id; //lcore的编号
  199. } __rte_cache_aligned;
  200.  
  201. static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];//最大1024
  202.  
  203. //此处可以修改lcore的默认配置
  204. static struct lcore_params lcore_params_array_default[] = {
  205. {, , },//物理端口的编号,网卡队列的编号,lcore的编号
  206. {, , },
  207. {, , },
  208. {, , },
  209. {, , },
  210. {, , },
  211. {, , },
  212. {, , },
  213. {, , },
  214. };
  215.  
  216. static struct lcore_params * lcore_params = lcore_params_array_default;
  217. static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) /
  218. sizeof(lcore_params_array_default[]);//默认值为9
  219.  
  220. static struct rte_eth_conf port_conf = {
  221. .rxmode = {
  222. .mq_mode = ETH_MQ_RX_RSS, //看起来l3fwd支持RSS哟
  223. .max_rx_pkt_len = ETHER_MAX_LEN,
  224. .split_hdr_size = ,
  225. .header_split = , /**< Header Split disabled */
  226. .hw_ip_checksum = , /**< IP checksum offload enabled */
  227. .hw_vlan_filter = , /**< VLAN filtering disabled */
  228. .jumbo_frame = , /**< Jumbo Frame Support disabled */
  229. .hw_strip_crc = , /**< CRC stripped by hardware */
  230. },
  231. .rx_adv_conf = {
  232. .rss_conf = {
  233. .rss_key = NULL,
  234. .rss_hf = ETH_RSS_IP,
  235. },
  236. },
  237. .txmode = {
  238. .mq_mode = ETH_MQ_TX_NONE,
  239. },
  240. };
  241.  
  242. static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
  243.  
  244. #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
  245. #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
  246. #include <rte_hash_crc.h>
  247. #define DEFAULT_HASH_FUNC rte_hash_crc
  248. #else
  249. #include <rte_jhash.h>
  250. #define DEFAULT_HASH_FUNC rte_jhash
  251. #endif
  252. struct ipv4_5tuple { //五元组
  253. uint32_t ip_dst; //目的ip地址
  254. uint32_t ip_src; //源ip地址
  255. uint16_t port_dst; //目的端口号
  256. uint16_t port_src; //源端口号
  257. uint8_t proto; //传输层协议类型
  258. } __attribute__((__packed__));
  259. union ipv4_5tuple_host {
  260. struct {
  261. uint8_t pad0;
  262. uint8_t proto;
  263. uint16_t pad1;
  264. uint32_t ip_src;
  265. uint32_t ip_dst;
  266. uint16_t port_src;
  267. uint16_t port_dst;
  268. };
  269. __m128i xmm;
  270. };
  271.  
  272. #define XMM_NUM_IN_IPV6_5TUPLE 3
  273. struct ipv6_5tuple {
  274. uint8_t ip_dst[IPV6_ADDR_LEN];
  275. uint8_t ip_src[IPV6_ADDR_LEN];
  276. uint16_t port_dst;
  277. uint16_t port_src;
  278. uint8_t proto;
  279. } __attribute__((__packed__));
  280. union ipv6_5tuple_host {
  281. struct {
  282. uint16_t pad0;
  283. uint8_t proto;
  284. uint8_t pad1;
  285. uint8_t ip_src[IPV6_ADDR_LEN];
  286. uint8_t ip_dst[IPV6_ADDR_LEN];
  287. uint16_t port_src;
  288. uint16_t port_dst;
  289. uint64_t reserve;
  290. };
  291. __m128i xmm[XMM_NUM_IN_IPV6_5TUPLE];
  292. };
  293. struct ipv4_l3fwd_route {
  294. struct ipv4_5tuple key;
  295. uint8_t if_out;
  296. };
  297. struct ipv6_l3fwd_route {
  298. struct ipv6_5tuple key; u
  299. int8_t if_out;
  300. };
  301. //这里设置默认的静态的三层转发路由规则,实际使用的时候需要修改这个地方
  302. static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
  303. {{IPv4(,,,), IPv4(,,,), , , IPPROTO_TCP}, },
  304. {{IPv4(,,,), IPv4(,,,), , , IPPROTO_TCP}, },
  305. {{IPv4(,,,), IPv4(,,,), , , IPPROTO_TCP}, },
  306. {{IPv4(,,,), IPv4(,,,), , , IPPROTO_TCP}, },
  307. };
  308. static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
  309. {{ {0xfe, 0x80, , , , , , , 0x02, 0x1e, 0x67, 0xff, 0xfe, , , },
  310. {0xfe, 0x80, , , , , , , 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
  311. , , IPPROTO_TCP}, },
  312. {{ {0xfe, 0x90, , , , , , , 0x02, 0x1e, 0x67, 0xff, 0xfe, , , },
  313. {0xfe, 0x90, , , , , , , 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
  314. , , IPPROTO_TCP}, },
  315. {{ {0xfe, 0xa0, , , , , , , 0x02, 0x1e, 0x67, 0xff, 0xfe, , , },
  316. {0xfe, 0xa0, , , , , , , 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
  317. , , IPPROTO_TCP}, },
  318. {{ {0xfe, 0xb0, , , , , , , 0x02, 0x1e, 0x67, 0xff, 0xfe, , , },
  319. {0xfe, 0xb0, , , , , , , 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
  320. , , IPPROTO_TCP}, },
  321. };
  322. typedef struct rte_hash lookup_struct_t;
  323. static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
  324. static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS];
  325. #ifdef RTE_ARCH_X86_64
  326. /* default to 4 million hash entries (approx) */
  327. #define L3FWD_HASH_ENTRIES 1024*1024*4
  328. #else
  329. /* 32-bit has less address-space for hugepage memory, limit to 1M entries */
  330. #define L3FWD_HASH_ENTRIES 1024*1024*1
  331. #endif
  332. #define HASH_ENTRY_NUMBER_DEFAULT 4
  333. static uint32_t hash_entry_number = HASH_ENTRY_NUMBER_DEFAULT;
  334. static inline uint32_tipv4_hash_crc(const void *data,
  335. __rte_unused uint32_t data_len, uint32_t init_val){
  336. const union ipv4_5tuple_host *k;
  337. uint32_t t; const uint32_t *p;
  338. k = data;
  339. t = k->proto;
  340. p = (const uint32_t *)&k->port_src;
  341. #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
  342. init_val = rte_hash_crc_4byte(t, init_val);
  343. init_val = rte_hash_crc_4byte(k->ip_src, init_val);
  344. init_val = rte_hash_crc_4byte(k->ip_dst, init_val);
  345. init_val = rte_hash_crc_4byte(*p, init_val);
  346. #else /* RTE_MACHINE_CPUFLAG_SSE4_2 */
  347. init_val = rte_jhash_1word(t, init_val);
  348. init_val = rte_jhash_1word(k->ip_src, init_val);
  349. init_val = rte_jhash_1word(k->ip_dst, init_val);
  350. init_val = rte_jhash_1word(*p, init_val);
  351. #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
  352. return (init_val);
  353. }
  354. static inline uint32_tipv6_hash_crc(const void *data,
  355. __rte_unused uint32_t data_len, uint32_t init_val){
  356. const union ipv6_5tuple_host *k;
  357. uint32_t t;
  358. const uint32_t *p;
  359. #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
  360. const uint32_t *ip_src0, *ip_src1, *ip_src2, *ip_src3;
  361. const uint32_t *ip_dst0, *ip_dst1, *ip_dst2, *ip_dst3;
  362. #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
  363. k = data;
  364. t = k->proto;
  365. p = (const uint32_t *)&k->port_src;
  366. #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
  367. ip_src0 = (const uint32_t *) k->ip_src;
  368. ip_src1 = (const uint32_t *)(k->ip_src+);
  369. ip_src2 = (const uint32_t *)(k->ip_src+);
  370. ip_src3 = (const uint32_t *)(k->ip_src+);
  371. ip_dst0 = (const uint32_t *) k->ip_dst;
  372. ip_dst1 = (const uint32_t *)(k->ip_dst+);
  373. ip_dst2 = (const uint32_t *)(k->ip_dst+);
  374. ip_dst3 = (const uint32_t *)(k->ip_dst+);
  375. init_val = rte_hash_crc_4byte(t, init_val);
  376. init_val = rte_hash_crc_4byte(*ip_src0, init_val);
  377. init_val = rte_hash_crc_4byte(*ip_src1, init_val);
  378. init_val = rte_hash_crc_4byte(*ip_src2, init_val);
  379. init_val = rte_hash_crc_4byte(*ip_src3, init_val);
  380. init_val = rte_hash_crc_4byte(*ip_dst0, init_val);
  381. init_val = rte_hash_crc_4byte(*ip_dst1, init_val);
  382. init_val = rte_hash_crc_4byte(*ip_dst2, init_val);
  383. init_val = rte_hash_crc_4byte(*ip_dst3, init_val);
  384. init_val = rte_hash_crc_4byte(*p, init_val);
  385. #else /* RTE_MACHINE_CPUFLAG_SSE4_2 */
  386. init_val = rte_jhash_1word(t, init_val);
  387. init_val = rte_jhash(k->ip_src, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val);
  388. init_val = rte_jhash(k->ip_dst, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val);
  389. init_val = rte_jhash_1word(*p, init_val);
  390. #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
  391. return (init_val);
  392. }
  393. #define IPV4_L3FWD_NUM_ROUTES \
  394. (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[]))
  395. #define IPV6_L3FWD_NUM_ROUTES \
  396. (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[]))
  397. static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
  398. static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
  399. #endif
  400.  
  401. #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
  402. struct ipv4_l3fwd_route {
  403. uint32_t ip; //看起来l3fwd支持RSS哟
  404. uint8_t depth; //深度
  405. uint8_t if_out; //数据转发的出口
  406. };
  407.  
  408. struct ipv6_l3fwd_route {
  409. uint8_t ip[];
  410. uint8_t depth;
  411. uint8_t if_out;
  412. };
  413.  
  414. //这里设置默认的静态的三层转发路由规则,实际使用的时候需要修改这个地方
  415. static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { //只有8个元素???
  416. {IPv4(,,,), , }, //{IPv4(192,168,10,0), 24, 0},
  417. {IPv4(,,,), , },
  418. {IPv4(,,,), , },
  419. {IPv4(,,,), , },
  420. {IPv4(,,,), , },
  421. {IPv4(,,,), , },
  422. {IPv4(,,,), , },
  423. {IPv4(,,,), , },
  424. };
  425.  
  426. static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
  427. {{,,,,,,,,,,,,,,,}, , },
  428. {{,,,,,,,,,,,,,,,}, , },
  429. {{,,,,,,,,,,,,,,,}, , },
  430. {{,,,,,,,,,,,,,,,}, , },
  431. {{,,,,,,,,,,,,,,,}, , },
  432. {{,,,,,,,,,,,,,,,}, , },
  433. {{,,,,,,,,,,,,,,,}, , },
  434. {{,,,,,,,,,,,,,,,}, , },
  435. };
  436.  
  437. static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
  438. {{IPv4(,,,), IPv4(,,,), , , IPPROTO_TCP}, },
  439. {{IPv4(,,,), IPv4(,,,), , , IPPROTO_TCP}, },
  440. {{IPv4(,,,), IPv4(,,,), , , IPPROTO_TCP}, },
  441. {{IPv4(,,,), IPv4(,,,), , , IPPROTO_TCP}, },
  442. };
  443.  
  444. static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
  445. {{
  446. {0xfe, 0x80, , , , , , , 0x02, 0x1e, 0x67, 0xff, 0xfe, , , },
  447. {0xfe, 0x80, , , , , , , 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
  448. , , IPPROTO_TCP}, },
  449.  
  450. {{
  451. {0xfe, 0x90, , , , , , , 0x02, 0x1e, 0x67, 0xff, 0xfe, , , },
  452. {0xfe, 0x90, , , , , , , 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
  453. , , IPPROTO_TCP}, },
  454.  
  455. {{
  456. {0xfe, 0xa0, , , , , , , 0x02, 0x1e, 0x67, 0xff, 0xfe, , , },
  457. {0xfe, 0xa0, , , , , , , 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
  458. , , IPPROTO_TCP}, },
  459.  
  460. {{
  461. {0xfe, 0xb0, , , , , , , 0x02, 0x1e, 0x67, 0xff, 0xfe, , , },
  462. {0xfe, 0xb0, , , , , , , 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
  463. , , IPPROTO_TCP}, },
  464. };
  465.  
  466. #define IPV4_L3FWD_NUM_ROUTES \
  467. (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[]))
  468. #define IPV6_L3FWD_NUM_ROUTES \
  469. (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[]))
  470.  
  471. #define IPV4_L3FWD_LPM_MAX_RULES 1024
  472. #define IPV6_L3FWD_LPM_MAX_RULES 1024
  473. #define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 16)
  474.  
  475. typedef struct rte_lpm lookup_struct_t;
  476. typedef struct rte_lpm6 lookup6_struct_t;
  477. static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];//8个元素
  478. static lookup6_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS];
  479. #endif
  480.  
  481. struct lcore_conf {//保存lcore的配置信息
  482. uint16_t n_rx_queue; //接收队列的总数量
  483. struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];//物理端口和网卡队列编号组成的数组
  484. uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; //发送队列的编号组成的数组
  485. struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];//mbuf表
  486. lookup_struct_t * ipv4_lookup_struct; //实际上就是struct rte_lpm *
  487. #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
  488. lookup6_struct_t * ipv6_lookup_struct;
  489. #else
  490. lookup_struct_t * ipv6_lookup_struct;
  491. #endif
  492. } __rte_cache_aligned;
  493.  
  494. static struct lcore_conf lcore_conf[RTE_MAX_LCORE];
  495.  
  496. /* Send burst of packets on an output interface */
  497. static inline int //在输出接口port上把数据包burst发送出去
  498. send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port)
  499. {
  500. struct rte_mbuf **m_table;
  501. int ret;
  502. uint16_t queueid;
  503.  
  504. queueid = qconf->tx_queue_id[port];
  505. m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
  506.  
  507. ret = rte_eth_tx_burst(port, queueid, m_table, n);
  508. if (unlikely(ret < n)) {
  509. do {
  510. rte_pktmbuf_free(m_table[ret]);
  511. } while (++ret < n);
  512. }
  513.  
  514. return ;
  515. }
  516.  
  517. /* Enqueue a single packet, and send burst if queue is filled */
  518. static inline int //发送一个mbuf
  519. send_single_packet(struct rte_mbuf *m, uint8_t port)
  520. {
  521. uint32_t lcore_id;
  522. uint16_t len;
  523. struct lcore_conf *qconf;
  524.  
  525. lcore_id = rte_lcore_id();
  526.  
  527. qconf = &lcore_conf[lcore_id];
  528. len = qconf->tx_mbufs[port].len;
  529. qconf->tx_mbufs[port].m_table[len] = m;
  530. len++;
  531.  
  532. /* enough pkts to be sent */
  533. if (unlikely(len == MAX_PKT_BURST)) { //如果累计到32个数据包
  534. send_burst(qconf, MAX_PKT_BURST, port); //把32个数据包发送出去
  535. len = ;
  536. }
  537.  
  538. qconf->tx_mbufs[port].len = len;
  539. return ;
  540. }
  541.  
  542. static inline __attribute__ void
  543. send_packetsx4(struct lcore_conf *qconf, uint8_t port,
  544. struct rte_mbuf *m[], uint32_t num)
  545. {
  546. uint32_t len, j, n;
  547.  
  548. len = qconf->tx_mbufs[port].len;
  549.  
  550. /* 如果某个队列的发送缓冲区为空,而且已有足够数量数据包待发送,那么立即发送
  551. * If TX buffer for that queue is empty, and we have enough packets,
  552. * then send them straightway.
  553. */
  554. if (num >= MAX_TX_BURST && len == ) {
  555. n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num);//burst发送num个mbufs
  556. if (unlikely(n < num)) { //如果实际发送数据包的个数小于num
  557. do {
  558. rte_pktmbuf_free(m[n]); //把剩下的num-n个mbufs返回mempool
  559. } while (++n < num);
  560. }
  561. return;
  562. }
  563.  
  564. /*
  565. * Put packets into TX buffer for that queue.
  566. */
  567. //把那些数据包放到网卡队列的发送缓冲区中
  568. n = len + num;
  569. n = (n > MAX_PKT_BURST) ? MAX_PKT_BURST - len : num;
  570.  
  571. j = ;
  572. switch (n % FWDSTEP) {
  573. while (j < n) {
  574. case :
  575. qconf->tx_mbufs[port].m_table[len + j] = m[j];
  576. j++;
  577. case :
  578. qconf->tx_mbufs[port].m_table[len + j] = m[j];
  579. j++;
  580. case :
  581. qconf->tx_mbufs[port].m_table[len + j] = m[j];
  582. j++;
  583. case :
  584. qconf->tx_mbufs[port].m_table[len + j] = m[j];
  585. j++;
  586. }
  587. }
  588.  
  589. len += n;
  590.  
  591. /*待发送的包数量达到32个 enough pkts to be sent */
  592. if (unlikely(len == MAX_PKT_BURST)) {
  593.  
  594. send_burst(qconf, MAX_PKT_BURST, port);
  595.  
  596. /* copy rest of the packets into the TX buffer. */
  597. len = num - n;
  598. j = ;
  599. switch (len % FWDSTEP) {
  600. while (j < len) {
  601. case :
  602. qconf->tx_mbufs[port].m_table[j] = m[n + j];
  603. j++;
  604. case :
  605. qconf->tx_mbufs[port].m_table[j] = m[n + j];
  606. j++;
  607. case :
  608. qconf->tx_mbufs[port].m_table[j] = m[n + j];
  609. j++;
  610. case :
  611. qconf->tx_mbufs[port].m_table[j] = m[n + j];
  612. j++;
  613. }
  614. }
  615. }
  616.  
  617. qconf->tx_mbufs[port].len = len;
  618. }
  619.  
  620. #ifdef DO_RFC_1812_CHECKS
  621. static inline int
  622. is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
  623. {
  624. /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
  625. /*
  626. * 1. The packet length reported by the Link Layer must be large
  627. * enough to hold the minimum length legal IP datagram (20 bytes).
  628. */
  629. if (link_len < sizeof(struct ipv4_hdr))
  630. return -;
  631.  
  632. /* 2. The IP checksum must be correct. */
  633. /* this is checked in H/W */
  634.  
  635. /*
  636. * 3. The IP version number must be 4. If the version number is not 4
  637. * then the packet may be another version of IP, such as IPng or
  638. * ST-II.
  639. */
  640. if (((pkt->version_ihl) >> ) != )
  641. return -;
  642. /*
  643. * 4. The IP header length field must be large enough to hold the
  644. * minimum length legal IP datagram (20 bytes = 5 words).
  645. */
  646. if ((pkt->version_ihl & 0xf) < )
  647. return -;
  648.  
  649. /*
  650. * 5. The IP total length field must be large enough to hold the IP
  651. * datagram header, whose length is specified in the IP header length
  652. * field.
  653. */
  654. if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
  655. return -;
  656.  
  657. return ;
  658. }
  659. #endif
  660.  
  661. #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
  662.  
  663. static __m128i mask0;
  664. static __m128i mask1;
  665. static __m128i mask2;
  666. static inline uint8_t //哈希情形下获取转发出口
  667. get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid, lookup_struct_t * ipv4_l3fwd_lookup_struct)
  668. {
  669. int ret = ;
  670. union ipv4_5tuple_host key;
  671.  
  672. ipv4_hdr = (uint8_t *)ipv4_hdr + offsetof(struct ipv4_hdr, time_to_live);
  673. __m128i data = _mm_loadu_si128((__m128i*)(ipv4_hdr));
  674. /* Get 5 tuple: dst port, src port, dst IP address, src IP address and protocol */
  675. key.xmm = _mm_and_si128(data, mask0);
  676. /* Find destination port */
  677. ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key);
  678. return (uint8_t)((ret < )? portid : ipv4_l3fwd_out_if[ret]);
  679. }
  680.  
  681. static inline uint8_t
  682. get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, lookup_struct_t * ipv6_l3fwd_lookup_struct)
  683. {
  684. int ret = ;
  685. union ipv6_5tuple_host key;
  686.  
  687. ipv6_hdr = (uint8_t *)ipv6_hdr + offsetof(struct ipv6_hdr, payload_len);
  688. __m128i data0 = _mm_loadu_si128((__m128i*)(ipv6_hdr));
  689. __m128i data1 = _mm_loadu_si128((__m128i*)(((uint8_t*)ipv6_hdr)+sizeof(__m128i)));
  690. __m128i data2 = _mm_loadu_si128((__m128i*)(((uint8_t*)ipv6_hdr)+sizeof(__m128i)+sizeof(__m128i)));
  691. /* Get part of 5 tuple: src IP address lower 96 bits and protocol */
  692. key.xmm[] = _mm_and_si128(data0, mask1);
  693. /* Get part of 5 tuple: dst IP address lower 96 bits and src IP address higher 32 bits */
  694. key.xmm[] = data1;
  695. /* Get part of 5 tuple: dst port and src port and dst IP address higher 32 bits */
  696. key.xmm[] = _mm_and_si128(data2, mask2);
  697.  
  698. /* Find destination port */
  699. ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key);
  700. return (uint8_t)((ret < )? portid : ipv6_l3fwd_out_if[ret]);
  701. }
  702. #endif
  703.  
  704. #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
  705.  
  706. static inline uint8_t //LPM情形下获取ipv4数据包的目的端口
  707. get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid, lookup_struct_t * ipv4_l3fwd_lookup_struct)
  708. {
  709. uint8_t next_hop;
  710.  
  711. return (uint8_t) ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct,
  712. rte_be_to_cpu_32(((struct ipv4_hdr *)ipv4_hdr)->dst_addr),
  713. &next_hop) == ) ? next_hop : portid);
  714. }
  715.  
  716. static inline uint8_t
  717. get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, lookup6_struct_t * ipv6_l3fwd_lookup_struct)
  718. {
  719. uint8_t next_hop;
  720. return (uint8_t) ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct,
  721. ((struct ipv6_hdr*)ipv6_hdr)->dst_addr, &next_hop) == )?
  722. next_hop : portid);
  723. }
  724. #endif
  725.  
  726. static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid,
  727. struct lcore_conf *qconf) __attribute__((unused));
  728.  
  729. #if ((APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) && \
  730. (ENABLE_MULTI_BUFFER_OPTIMIZE == ))
  731.  
  732. static inline void get_ipv6_5tuple(struct rte_mbuf* m0, __m128i mask0, __m128i mask1,
  733. union ipv6_5tuple_host * key)
  734. {
  735. __m128i tmpdata0 = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m0, unsigned char *)
  736. + sizeof(struct ether_hdr) + offsetof(struct ipv6_hdr, payload_len)));
  737. __m128i tmpdata1 = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m0, unsigned char *)
  738. + sizeof(struct ether_hdr) + offsetof(struct ipv6_hdr, payload_len)
  739. + sizeof(__m128i)));
  740. __m128i tmpdata2 = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m0, unsigned char *)
  741. + sizeof(struct ether_hdr) + offsetof(struct ipv6_hdr, payload_len)
  742. + sizeof(__m128i) + sizeof(__m128i)));
  743. key->xmm[] = _mm_and_si128(tmpdata0, mask0);
  744. key->xmm[] = tmpdata1;
  745. key->xmm[] = _mm_and_si128(tmpdata2, mask1);
  746. return;
  747. }
  748.  
  749. static inline void
  750. simple_ipv4_fwd_4pkts(struct rte_mbuf* m[], uint8_t portid, struct lcore_conf *qconf)
  751. {
  752. struct ether_hdr *eth_hdr[];
  753. struct ipv4_hdr *ipv4_hdr[];
  754. void *d_addr_bytes[];
  755. uint8_t dst_port[];
  756. int32_t ret[];
  757. union ipv4_5tuple_host key[];
  758. __m128i data[];
  759.  
  760. eth_hdr[] = rte_pktmbuf_mtod(m[], struct ether_hdr *);
  761. eth_hdr[] = rte_pktmbuf_mtod(m[], struct ether_hdr *);
  762. eth_hdr[] = rte_pktmbuf_mtod(m[], struct ether_hdr *);
  763. eth_hdr[] = rte_pktmbuf_mtod(m[], struct ether_hdr *);
  764.  
  765. /* Handle IPv4 headers.*/
  766. ipv4_hdr[] = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m[], unsigned char *) +
  767. sizeof(struct ether_hdr));
  768. ipv4_hdr[] = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m[], unsigned char *) +
  769. sizeof(struct ether_hdr));
  770. ipv4_hdr[] = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m[], unsigned char *) +
  771. sizeof(struct ether_hdr));
  772. ipv4_hdr[] = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m[], unsigned char *) +
  773. sizeof(struct ether_hdr));
  774.  
  775. #ifdef DO_RFC_1812_CHECKS
  776. /* Check to make sure the packet is valid (RFC1812) */
  777. uint8_t valid_mask = MASK_ALL_PKTS;
  778. if (is_valid_ipv4_pkt(ipv4_hdr[], m[]->pkt_len) < ) {
  779. rte_pktmbuf_free(m[]);
  780. valid_mask &= EXECLUDE_1ST_PKT;
  781. }
  782. if (is_valid_ipv4_pkt(ipv4_hdr[], m[]->pkt_len) < ) {
  783. rte_pktmbuf_free(m[]);
  784. valid_mask &= EXECLUDE_2ND_PKT;
  785. }
  786. if (is_valid_ipv4_pkt(ipv4_hdr[], m[]->pkt_len) < ) {
  787. rte_pktmbuf_free(m[]);
  788. valid_mask &= EXECLUDE_3RD_PKT;
  789. }
  790. if (is_valid_ipv4_pkt(ipv4_hdr[], m[]->pkt_len) < ) {
  791. rte_pktmbuf_free(m[]);
  792. valid_mask &= EXECLUDE_4TH_PKT;
  793. }
  794. if (unlikely(valid_mask != MASK_ALL_PKTS)) {
  795. if (valid_mask == ){
  796. return;
  797. } else {
  798. uint8_t i = ;
  799. for (i = ; i < ; i++) {
  800. if ((0x1 << i) & valid_mask) {
  801. l3fwd_simple_forward(m[i], portid, qconf);
  802. }
  803. }
  804. return;
  805. }
  806. }
  807. #endif // End of #ifdef DO_RFC_1812_CHECKS
  808.  
  809. data[] = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m[], unsigned char *) +
  810. sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
  811. data[] = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m[], unsigned char *) +
  812. sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
  813. data[] = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m[], unsigned char *) +
  814. sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
  815. data[] = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m[], unsigned char *) +
  816. sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
  817.  
  818. key[].xmm = _mm_and_si128(data[], mask0);
  819. key[].xmm = _mm_and_si128(data[], mask0);
  820. key[].xmm = _mm_and_si128(data[], mask0);
  821. key[].xmm = _mm_and_si128(data[], mask0);
  822.  
  823. const void *key_array[] = {&key[], &key[], &key[],&key[]};
  824. rte_hash_lookup_multi(qconf->ipv4_lookup_struct, &key_array[], , ret);
  825. dst_port[] = (uint8_t) ((ret[] < ) ? portid : ipv4_l3fwd_out_if[ret[]]);
  826. dst_port[] = (uint8_t) ((ret[] < ) ? portid : ipv4_l3fwd_out_if[ret[]]);
  827. dst_port[] = (uint8_t) ((ret[] < ) ? portid : ipv4_l3fwd_out_if[ret[]]);
  828. dst_port[] = (uint8_t) ((ret[] < ) ? portid : ipv4_l3fwd_out_if[ret[]]);
  829.  
  830. if (dst_port[] >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port[]) == )
  831. dst_port[] = portid;
  832. if (dst_port[] >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port[]) == )
  833. dst_port[] = portid;
  834. if (dst_port[] >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port[]) == )
  835. dst_port[] = portid;
  836. if (dst_port[] >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port[]) == )
  837. dst_port[] = portid;
  838.  
  839. /* 02:00:00:00:00:xx */
  840. d_addr_bytes[] = &eth_hdr[]->d_addr.addr_bytes[];
  841. d_addr_bytes[] = &eth_hdr[]->d_addr.addr_bytes[];
  842. d_addr_bytes[] = &eth_hdr[]->d_addr.addr_bytes[];
  843. d_addr_bytes[] = &eth_hdr[]->d_addr.addr_bytes[];
  844. *((uint64_t *)d_addr_bytes[]) = 0x000000000002 + ((uint64_t)dst_port[] << );
  845. *((uint64_t *)d_addr_bytes[]) = 0x000000000002 + ((uint64_t)dst_port[] << );
  846. *((uint64_t *)d_addr_bytes[]) = 0x000000000002 + ((uint64_t)dst_port[] << );
  847. *((uint64_t *)d_addr_bytes[]) = 0x000000000002 + ((uint64_t)dst_port[] << );
  848.  
  849. #ifdef DO_RFC_1812_CHECKS
  850. /* Update time to live and header checksum */
  851. --(ipv4_hdr[]->time_to_live);
  852. --(ipv4_hdr[]->time_to_live);
  853. --(ipv4_hdr[]->time_to_live);
  854. --(ipv4_hdr[]->time_to_live);
  855. ++(ipv4_hdr[]->hdr_checksum);
  856. ++(ipv4_hdr[]->hdr_checksum);
  857. ++(ipv4_hdr[]->hdr_checksum);
  858. ++(ipv4_hdr[]->hdr_checksum);
  859. #endif
  860.  
  861. /* src addr */
  862. ether_addr_copy(&ports_eth_addr[dst_port[]], &eth_hdr[]->s_addr);
  863. ether_addr_copy(&ports_eth_addr[dst_port[]], &eth_hdr[]->s_addr);
  864. ether_addr_copy(&ports_eth_addr[dst_port[]], &eth_hdr[]->s_addr);
  865. ether_addr_copy(&ports_eth_addr[dst_port[]], &eth_hdr[]->s_addr);
  866.  
  867. send_single_packet(m[], (uint8_t)dst_port[]);
  868. send_single_packet(m[], (uint8_t)dst_port[]);
  869. send_single_packet(m[], (uint8_t)dst_port[]);
  870. send_single_packet(m[], (uint8_t)dst_port[]);
  871.  
  872. }
  873.  
  874. #define MASK_ALL_PKTS 0xf
  875. #define EXECLUDE_1ST_PKT 0xe
  876. #define EXECLUDE_2ND_PKT 0xd
  877. #define EXECLUDE_3RD_PKT 0xb
  878. #define EXECLUDE_4TH_PKT 0x7
  879.  
  880. static inline void
  881. simple_ipv6_fwd_4pkts(struct rte_mbuf* m[], uint8_t portid, struct lcore_conf *qconf)
  882. {
  883. struct ether_hdr *eth_hdr[];
  884. __attribute__((unused)) struct ipv6_hdr *ipv6_hdr[];
  885. void *d_addr_bytes[];
  886. uint8_t dst_port[];
  887. int32_t ret[];
  888. union ipv6_5tuple_host key[];
  889.  
  890. eth_hdr[] = rte_pktmbuf_mtod(m[], struct ether_hdr *);
  891. eth_hdr[] = rte_pktmbuf_mtod(m[], struct ether_hdr *);
  892. eth_hdr[] = rte_pktmbuf_mtod(m[], struct ether_hdr *);
  893. eth_hdr[] = rte_pktmbuf_mtod(m[], struct ether_hdr *);
  894.  
  895. /* Handle IPv6 headers.*/
  896. ipv6_hdr[] = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m[], unsigned char *) +
  897. sizeof(struct ether_hdr));
  898. ipv6_hdr[] = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m[], unsigned char *) +
  899. sizeof(struct ether_hdr));
  900. ipv6_hdr[] = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m[], unsigned char *) +
  901. sizeof(struct ether_hdr));
  902. ipv6_hdr[] = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m[], unsigned char *) +
  903. sizeof(struct ether_hdr));
  904.  
  905. get_ipv6_5tuple(m[], mask1, mask2, &key[]);
  906. get_ipv6_5tuple(m[], mask1, mask2, &key[]);
  907. get_ipv6_5tuple(m[], mask1, mask2, &key[]);
  908. get_ipv6_5tuple(m[], mask1, mask2, &key[]);
  909.  
  910. const void *key_array[] = {&key[], &key[], &key[],&key[]};
  911. rte_hash_lookup_multi(qconf->ipv6_lookup_struct, &key_array[], , ret);
  912. dst_port[] = (uint8_t) ((ret[] < )? portid:ipv6_l3fwd_out_if[ret[]]);
  913. dst_port[] = (uint8_t) ((ret[] < )? portid:ipv6_l3fwd_out_if[ret[]]);
  914. dst_port[] = (uint8_t) ((ret[] < )? portid:ipv6_l3fwd_out_if[ret[]]);
  915. dst_port[] = (uint8_t) ((ret[] < )? portid:ipv6_l3fwd_out_if[ret[]]);
  916.  
  917. if (dst_port[] >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port[]) == )
  918. dst_port[] = portid;
  919. if (dst_port[] >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port[]) == )
  920. dst_port[] = portid;
  921. if (dst_port[] >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port[]) == )
  922. dst_port[] = portid;
  923. if (dst_port[] >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port[]) == )
  924. dst_port[] = portid;
  925.  
  926. /* 02:00:00:00:00:xx */
  927. d_addr_bytes[] = &eth_hdr[]->d_addr.addr_bytes[];
  928. d_addr_bytes[] = &eth_hdr[]->d_addr.addr_bytes[];
  929. d_addr_bytes[] = &eth_hdr[]->d_addr.addr_bytes[];
  930. d_addr_bytes[] = &eth_hdr[]->d_addr.addr_bytes[];
  931. *((uint64_t *)d_addr_bytes[]) = 0x000000000002 + ((uint64_t)dst_port[] << );
  932. *((uint64_t *)d_addr_bytes[]) = 0x000000000002 + ((uint64_t)dst_port[] << );
  933. *((uint64_t *)d_addr_bytes[]) = 0x000000000002 + ((uint64_t)dst_port[] << );
  934. *((uint64_t *)d_addr_bytes[]) = 0x000000000002 + ((uint64_t)dst_port[] << );
  935.  
  936. /* src addr */
  937. ether_addr_copy(&ports_eth_addr[dst_port[]], &eth_hdr[]->s_addr);
  938. ether_addr_copy(&ports_eth_addr[dst_port[]], &eth_hdr[]->s_addr);
  939. ether_addr_copy(&ports_eth_addr[dst_port[]], &eth_hdr[]->s_addr);
  940. ether_addr_copy(&ports_eth_addr[dst_port[]], &eth_hdr[]->s_addr);
  941.  
  942. send_single_packet(m[], (uint8_t)dst_port[]);
  943. send_single_packet(m[], (uint8_t)dst_port[]);
  944. send_single_packet(m[], (uint8_t)dst_port[]);
  945. send_single_packet(m[], (uint8_t)dst_port[]);
  946.  
  947. }
  948. #endif /* APP_LOOKUP_METHOD */
  949.  
  950. static inline __attribute__ void //简单三层转发,没有使用SSE4.1优化
  951. l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, struct lcore_conf *qconf)
  952. {
  953. struct ether_hdr *eth_hdr;
  954. struct ipv4_hdr *ipv4_hdr;
  955. void *d_addr_bytes;
  956. uint8_t dst_port;
  957.  
  958. eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); //得到eth_hdr指针
  959.  
  960. if (m->ol_flags & PKT_RX_IPV4_HDR) { //如果是ipv4包
  961. /* Handle IPv4 headers.*/
  962. ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m, unsigned char *) +
  963. sizeof(struct ether_hdr));
  964.  
  965. #ifdef DO_RFC_1812_CHECKS
  966. /* Check to make sure the packet is valid (RFC1812) */
  967. if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < ) {
  968. rte_pktmbuf_free(m);
  969. return;
  970. }
  971. #endif
  972. //想要满足文生提出的需求,主要在这里修改ip层和tcp层的数据内容。
  973. dst_port = get_ipv4_dst_port(ipv4_hdr, portid, //获取转发出口
  974. qconf->ipv4_lookup_struct);
  975. if (dst_port >= RTE_MAX_ETHPORTS ||
  976. (enabled_port_mask & << dst_port) == )
  977. dst_port = portid; //出错则直接把入口作为转发出口
  978.  
  979. /* 02:00:00:00:00:xx 这里是修改目的mac地址吗??? */
  980. d_addr_bytes = &eth_hdr->d_addr.addr_bytes[];
  981. *((uint64_t *)d_addr_bytes) = ETHER_LOCAL_ADMIN_ADDR +
  982. ((uint64_t)dst_port << );
  983.  
  984. #ifdef DO_RFC_1812_CHECKS
  985. /* Update time to live and header checksum */
  986. --(ipv4_hdr->time_to_live);
  987. ++(ipv4_hdr->hdr_checksum);
  988. #endif
  989.  
  990. /* //把进入包的目的mac地址作为转发包的源地址 src addr */
  991. ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
  992.  
  993. send_single_packet(m, dst_port); //经过dst_port把转发包发送出去
  994.  
  995. } else { //如果是ipv6包
  996. /* Handle IPv6 headers.*/
  997. struct ipv6_hdr *ipv6_hdr;
  998.  
  999. ipv6_hdr = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m, unsigned char *) +
  1000. sizeof(struct ether_hdr));
  1001.  
  1002. dst_port = get_ipv6_dst_port(ipv6_hdr, portid, qconf->ipv6_lookup_struct);
  1003.  
  1004. if (dst_port >= RTE_MAX_ETHPORTS || (enabled_port_mask & << dst_port) == )
  1005. dst_port = portid;
  1006.  
  1007. /* 02:00:00:00:00:xx */
  1008. d_addr_bytes = &eth_hdr->d_addr.addr_bytes[];
  1009. *((uint64_t *)d_addr_bytes) = ETHER_LOCAL_ADMIN_ADDR +
  1010. ((uint64_t)dst_port << );
  1011.  
  1012. /* src addr */
  1013. ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
  1014.  
  1015. send_single_packet(m, dst_port);
  1016. }
  1017.  
  1018. }
  1019.  
  1020. #ifdef DO_RFC_1812_CHECKS
  1021.  
  1022. #define IPV4_MIN_VER_IHL 0x45
  1023. #define IPV4_MAX_VER_IHL 0x4f
  1024. #define IPV4_MAX_VER_IHL_DIFF (IPV4_MAX_VER_IHL - IPV4_MIN_VER_IHL)
  1025.  
  1026. /* Minimum value of IPV4 total length (20B) in network byte order. */
  1027. #define IPV4_MIN_LEN_BE (sizeof(struct ipv4_hdr) << 8)
  1028.  
  1029. /*
  1030. * From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2:
  1031. * - The IP version number must be 4.
  1032. * - The IP header length field must be large enough to hold the
  1033. * minimum length legal IP datagram (20 bytes = 5 words).
  1034. * - The IP total length field must be large enough to hold the IP
  1035. * datagram header, whose length is specified in the IP header length
  1036. * field.
  1037. * If we encounter invalid IPV4 packet, then set destination port for it
  1038. * to BAD_PORT value.
  1039. */
  1040. static inline __attribute__ void //ipv4错误检查
  1041. rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t flags)
  1042. {
  1043. uint8_t ihl;
  1044.  
  1045. if ((flags & PKT_RX_IPV4_HDR) != ) {//如果是ipv4
  1046.  
  1047. ihl = ipv4_hdr->version_ihl - IPV4_MIN_VER_IHL;
  1048.  
  1049. ipv4_hdr->time_to_live--;
  1050. ipv4_hdr->hdr_checksum++;
  1051.  
  1052. if (ihl > IPV4_MAX_VER_IHL_DIFF ||
  1053. ((uint8_t)ipv4_hdr->total_length == &&
  1054. ipv4_hdr->total_length < IPV4_MIN_LEN_BE)) {
  1055. dp[] = BAD_PORT; //应该是出错了
  1056. }
  1057. }
  1058. }
  1059.  
  1060. #else
  1061. #define rfc1812_process(mb, dp) do { } while (0)
  1062. #endif /* DO_RFC_1812_CHECKS */
  1063.  
  1064. #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
  1065. (ENABLE_MULTI_BUFFER_OPTIMIZE == ))
  1066.  
  1067. static inline __attribute__ uint16_t //得到目的ip地址对应的转发出口
  1068. get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
  1069. uint32_t dst_ipv4, uint8_t portid)
  1070. {
  1071. uint8_t next_hop;
  1072. struct ipv6_hdr *ipv6_hdr;
  1073. struct ether_hdr *eth_hdr;
  1074.  
  1075. if (pkt->ol_flags & PKT_RX_IPV4_HDR) { //如果都是ipv4
  1076. if (rte_lpm_lookup(qconf->ipv4_lookup_struct, dst_ipv4,
  1077. &next_hop) != ) //返回0则查找到,next_hop中已经得到下一跳
  1078. next_hop = portid; //此时没找到,则直接把portid设定为下一跳
  1079. } else if (pkt->ol_flags & PKT_RX_IPV6_HDR) { //如果都是ipv6
  1080. eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
  1081. ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + );
  1082. if (rte_lpm6_lookup(qconf->ipv6_lookup_struct,
  1083. ipv6_hdr->dst_addr, &next_hop) != )
  1084. next_hop = portid;
  1085. } else { //如果有其他种类的数据包
  1086. next_hop = portid;//设定下一跳
  1087. }
  1088.  
  1089. return next_hop;//返回下一跳
  1090. }
  1091.  
  1092. static inline void //处理一个数据包
  1093. process_packet(struct lcore_conf *qconf, struct rte_mbuf *pkt,
  1094. uint16_t *dst_port, uint8_t portid)
  1095. {
  1096. struct ether_hdr *eth_hdr;
  1097. struct ipv4_hdr *ipv4_hdr;
  1098. uint32_t dst_ipv4;
  1099. uint16_t dp;
  1100. __m128i te, ve;
  1101.  
  1102. eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);//获取eth首部
  1103. ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + );//获取ipv4首部
  1104.  
  1105. dst_ipv4 = ipv4_hdr->dst_addr; //得到大端的ipv4目的地址
  1106. dst_ipv4 = rte_be_to_cpu_32(dst_ipv4);//转换成小端
  1107. dp = get_dst_port(qconf, pkt, dst_ipv4, portid); //获取转发出口/下一跳
  1108.  
  1109. te = _mm_load_si128((__m128i *)eth_hdr);
  1110. ve = val_eth[dp];
  1111.  
  1112. dst_port[] = dp;
  1113. rfc1812_process(ipv4_hdr, dst_port, pkt->ol_flags);
  1114.  
  1115. te = _mm_blend_epi16(te, ve, MASK_ETH);
  1116. _mm_store_si128((__m128i *)eth_hdr, te);
  1117. }
  1118.  
  1119. /* 从4个mbufs中读取目的IP地址和ol_flags
  1120. * Read ol_flags and destination IPV4 addresses from 4 mbufs.
  1121. */
  1122. static inline void
  1123. processx4_step1(struct rte_mbuf *pkt[FWDSTEP], __m128i *dip, uint32_t *flag)
  1124. {
  1125. struct ipv4_hdr *ipv4_hdr;
  1126. struct ether_hdr *eth_hdr;
  1127. uint32_t x0, x1, x2, x3;
  1128. //第一个mbuf
  1129. eth_hdr = rte_pktmbuf_mtod(pkt[], struct ether_hdr *);//得到eth_hdr
  1130. ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + );//得到ipv4_hdr
  1131. x0 = ipv4_hdr->dst_addr;//得到dst_addr
  1132. flag[] = pkt[]->ol_flags & PKT_RX_IPV4_HDR;
  1133. //第二个mbuf
  1134. eth_hdr = rte_pktmbuf_mtod(pkt[], struct ether_hdr *);
  1135. ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + );
  1136. x1 = ipv4_hdr->dst_addr;
  1137. flag[] &= pkt[]->ol_flags; //与前一个mbuf标志做&运算
  1138. //第三个mbuf
  1139. eth_hdr = rte_pktmbuf_mtod(pkt[], struct ether_hdr *);
  1140. ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + );
  1141. x2 = ipv4_hdr->dst_addr;
  1142. flag[] &= pkt[]->ol_flags; //与前一个mbuf标志做&运算
  1143. //第四个mbuf
  1144. eth_hdr = rte_pktmbuf_mtod(pkt[], struct ether_hdr *);
  1145. ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + );
  1146. x3 = ipv4_hdr->dst_addr;
  1147. flag[] &= pkt[]->ol_flags; //与前一个mbuf标志做&运算
  1148.  
  1149. dip[] = _mm_set_epi32(x3, x2, x1, x0);//把4个dst_addr合并为128位的寄存器
  1150. }
  1151.  
  1152. /*
  1153. * Lookup into LPM for destination port.
  1154. * If lookup fails, use incoming port (portid) as destination port.
  1155. */ //在LPM中查找转发出口/下一跳,如果没有找到则把入口作为转发出口
  1156. static inline void
  1157. processx4_step2(const struct lcore_conf *qconf, __m128i dip, uint32_t flag,
  1158. uint8_t portid, struct rte_mbuf *pkt[FWDSTEP], uint16_t dprt[FWDSTEP])
  1159. {
  1160. rte_xmm_t dst;
  1161. const __m128i bswap_mask = _mm_set_epi8(, , , , , , , ,
  1162. , , , , , , , ); //表示重新排列的顺序
  1163.  
  1164. /* Byte swap 4 IPV4 addresses. 按照字节交换ipv4地址 */
  1165. dip = _mm_shuffle_epi8(dip, bswap_mask);
  1166.  
  1167. /* 如果4个分组都是ipv4的 if all 4 packets are IPV4. */
  1168. if (likely(flag != )) {
  1169. rte_lpm_lookupx4(qconf->ipv4_lookup_struct, dip, dprt, portid);
  1170. } else {
  1171. dst.x = dip; //获取4个目的ip地址
  1172. dprt[] = get_dst_port(qconf, pkt[], dst.u32[], portid);//得到下一跳/转发出口
  1173. dprt[] = get_dst_port(qconf, pkt[], dst.u32[], portid);
  1174. dprt[] = get_dst_port(qconf, pkt[], dst.u32[], portid);
  1175. dprt[] = get_dst_port(qconf, pkt[], dst.u32[], portid);
  1176. }
  1177. }
  1178.  
  1179. /*
  1180. * Update source and destination MAC addresses in the ethernet header.
  1181. * Perform RFC1812 checks and updates for IPV4 packets.
  1182. */ //更新目的mac和源mac地址
  1183. static inline void
  1184. processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
  1185. {
  1186. __m128i te[FWDSTEP];
  1187. __m128i ve[FWDSTEP];
  1188. __m128i *p[FWDSTEP];
  1189.  
  1190. p[] = (rte_pktmbuf_mtod(pkt[], __m128i *));//指向第一个数据包的内容
  1191. p[] = (rte_pktmbuf_mtod(pkt[], __m128i *));
  1192. p[] = (rte_pktmbuf_mtod(pkt[], __m128i *));
  1193. p[] = (rte_pktmbuf_mtod(pkt[], __m128i *));
  1194.  
  1195. ve[] = val_eth[dst_port[]];
  1196. te[] = _mm_load_si128(p[]);//将p[0]指向的内容加载到128位寄存器中
  1197.  
  1198. ve[] = val_eth[dst_port[]];
  1199. te[] = _mm_load_si128(p[]);
  1200.  
  1201. ve[] = val_eth[dst_port[]];
  1202. te[] = _mm_load_si128(p[]);
  1203.  
  1204. ve[] = val_eth[dst_port[]];
  1205. te[] = _mm_load_si128(p[]);
  1206.  
  1207. /*替换更新前12个字节,保留剩余 Update first 12 bytes, keep rest bytes intact. */
  1208. te[] = _mm_blend_epi16(te[], ve[], MASK_ETH);
  1209. te[] = _mm_blend_epi16(te[], ve[], MASK_ETH);
  1210. te[] = _mm_blend_epi16(te[], ve[], MASK_ETH);
  1211. te[] = _mm_blend_epi16(te[], ve[], MASK_ETH);
  1212.  
  1213. _mm_store_si128(p[], te[]);
  1214. _mm_store_si128(p[], te[]);
  1215. _mm_store_si128(p[], te[]);
  1216. _mm_store_si128(p[], te[]);
  1217.  
  1218. rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[] + ),
  1219. &dst_port[], pkt[]->ol_flags);
  1220. rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[] + ),
  1221. &dst_port[], pkt[]->ol_flags);
  1222. rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[] + ),
  1223. &dst_port[], pkt[]->ol_flags);
  1224. rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[] + ),
  1225. &dst_port[], pkt[]->ol_flags);
  1226. }
  1227.  
  1228. /* //把转发出口相同的连续数据包做一次burst发送
  1229. 为了避免额外的延迟,与其他的包处理一起完成,但在对转发出口做了决策之后。
  1230.  
  1231. * We group consecutive packets with the same destionation port into one burst.
  1232. * To avoid extra latency this is done together with some other packet
  1233. * processing, but after we made a final decision about packet's destination.
  1234. * To do this we maintain:
  1235. * pnum - array of number of consecutive packets with the same dest port for
  1236. * each packet in the input burst. ***pnum是保存转发出口相同的连续数据包的数组
  1237. * lp - pointer to the last updated element in the pnum. ***lp指向pnum中最后一次更新的元素
  1238. * dlp - dest port value lp corresponds to. ***dlp为lp对应的转发出口编号
  1239. */
  1240.  
  1241. #define GRPSZ (1 << FWDSTEP) //
  1242. #define GRPMSK (GRPSZ - 1) //
  1243.  
  1244. #define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \
  1245. if (likely((dlp) == (dcp)[(idx)])) { \
  1246. (lp)[]++; \
  1247. } else { \
  1248. (dlp) = (dcp)[idx]; \
  1249. (lp) = (pn) + (idx); \
  1250. (lp)[] = ; \
  1251. } \
  1252. } while ()
  1253.  
  1254. /*
  1255. * Group consecutive packets with the same destination port in bursts of 4.
  1256. * Suppose we have array of destionation ports:
  1257. * dst_port[] = {a, b, c, d,, e, ... }
  1258. * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
  1259. * We doing 4 comparisions at once and the result is 4 bit mask.
  1260. * This mask is used as an index into prebuild array of pnum values.
  1261. */
  1262. static inline uint16_t * //把出口相同的4个数据包构成一组
  1263. port_groupx4(uint16_t pn[FWDSTEP + ], uint16_t *lp, __m128i dp1, __m128i dp2)
  1264. {
  1265. static const struct {
  1266. uint64_t pnum; /*为pnum预设的4个值 prebuild 4 values for pnum[]. */
  1267. int32_t idx; /*最后一次更新的元素的索引 index for new last updated elemnet. */
  1268. uint16_t lpv; /*把值加到最后一次更新的元素 add value to the last updated element. */
  1269. } gptbl[GRPSZ] = {
  1270. {
  1271. /* 0: a != b, b != c, c != d, d != e */
  1272. .pnum = UINT64_C(0x0001000100010001),
  1273. .idx = ,
  1274. .lpv = ,
  1275. },
  1276. {
  1277. /* 1: a == b, b != c, c != d, d != e */
  1278. .pnum = UINT64_C(0x0001000100010002),
  1279. .idx = ,
  1280. .lpv = ,
  1281. },
  1282. {
  1283. /* 2: a != b, b == c, c != d, d != e */
  1284. .pnum = UINT64_C(0x0001000100020001),
  1285. .idx = ,
  1286. .lpv = ,
  1287. },
  1288. {
  1289. /* 3: a == b, b == c, c != d, d != e */
  1290. .pnum = UINT64_C(0x0001000100020003),
  1291. .idx = ,
  1292. .lpv = ,
  1293. },
  1294. {
  1295. /* 4: a != b, b != c, c == d, d != e */
  1296. .pnum = UINT64_C(0x0001000200010001),
  1297. .idx = ,
  1298. .lpv = ,
  1299. },
  1300. {
  1301. /* 5: a == b, b != c, c == d, d != e */
  1302. .pnum = UINT64_C(0x0001000200010002),
  1303. .idx = ,
  1304. .lpv = ,
  1305. },
  1306. {
  1307. /* 6: a != b, b == c, c == d, d != e */
  1308. .pnum = UINT64_C(0x0001000200030001),
  1309. .idx = ,
  1310. .lpv = ,
  1311. },
  1312. {
  1313. /* 7: a == b, b == c, c == d, d != e */
  1314. .pnum = UINT64_C(0x0001000200030004),
  1315. .idx = ,
  1316. .lpv = ,
  1317. },
  1318. {
  1319. /* 8: a != b, b != c, c != d, d == e */
  1320. .pnum = UINT64_C(0x0002000100010001),
  1321. .idx = ,
  1322. .lpv = ,
  1323. },
  1324. {
  1325. /* 9: a == b, b != c, c != d, d == e */
  1326. .pnum = UINT64_C(0x0002000100010002),
  1327. .idx = ,
  1328. .lpv = ,
  1329. },
  1330. {
  1331. /* 0xa: a != b, b == c, c != d, d == e */
  1332. .pnum = UINT64_C(0x0002000100020001),
  1333. .idx = ,
  1334. .lpv = ,
  1335. },
  1336. {
  1337. /* 0xb: a == b, b == c, c != d, d == e */
  1338. .pnum = UINT64_C(0x0002000100020003),
  1339. .idx = ,
  1340. .lpv = ,
  1341. },
  1342. {
  1343. /* 0xc: a != b, b != c, c == d, d == e */
  1344. .pnum = UINT64_C(0x0002000300010001),
  1345. .idx = ,
  1346. .lpv = ,
  1347. },
  1348. {
  1349. /* 0xd: a == b, b != c, c == d, d == e */
  1350. .pnum = UINT64_C(0x0002000300010002),
  1351. .idx = ,
  1352. .lpv = ,
  1353. },
  1354. {
  1355. /* 0xe: a != b, b == c, c == d, d == e */
  1356. .pnum = UINT64_C(0x0002000300040001),
  1357. .idx = ,
  1358. .lpv = ,
  1359. },
  1360. {
  1361. /* 0xf: a == b, b == c, c == d, d == e */
  1362. .pnum = UINT64_C(0x0002000300040005),
  1363. .idx = ,
  1364. .lpv = ,
  1365. },
  1366. };
  1367.  
  1368. union {
  1369. uint16_t u16[FWDSTEP + ];
  1370. uint64_t u64;
  1371. } *pnum = (void *)pn;
  1372.  
  1373. int32_t v;
  1374.  
  1375. dp1 = _mm_cmpeq_epi16(dp1, dp2); //按照16位一个单元来比较dp1和dp2
  1376. dp1 = _mm_unpacklo_epi16(dp1, dp1); //按照16位一个单元将dp1与dp1来结合
  1377. v = _mm_movemask_ps((__m128)dp1); //根据dp1的4个值形成4个位的掩码
  1378.  
  1379. /*更新最后一次端口计数 update last port counter. */
  1380. lp[] += gptbl[v].lpv;
  1381.  
  1382. /*如果转发出口的值已经改变 if dest port value has changed. */
  1383. if (v != GRPMSK) {
  1384. lp = pnum->u16 + gptbl[v].idx;
  1385. lp[] = ;
  1386. pnum->u64 = gptbl[v].pnum;
  1387. }
  1388.  
  1389. return lp;
  1390. }
  1391.  
  1392. #endif /* APP_LOOKUP_METHOD */
  1393.  
  1394. /* 线程执行函数 main processing loop */
  1395. static int
  1396. main_loop(__attribute__((unused)) void *dummy)
  1397. {
  1398. struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; //32个指针构成的数组
  1399. unsigned lcore_id;
  1400. uint64_t prev_tsc, diff_tsc, cur_tsc;
  1401. int i, j, nb_rx;
  1402. uint8_t portid, queueid;
  1403. struct lcore_conf *qconf;
  1404. const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - ) /
  1405. US_PER_S * BURST_TX_DRAIN_US;
  1406.  
  1407. #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
  1408. (ENABLE_MULTI_BUFFER_OPTIMIZE == ))
  1409. int32_t k;
  1410. uint16_t dlp; //dlp为lp对应的转发出口编号
  1411. uint16_t *lp; //lp指向pkts_burst中最后一次更新的元素
  1412. uint16_t dst_port[MAX_PKT_BURST]; //dst_port是32个数据包的转发出口构成的数组
  1413. __m128i dip[MAX_PKT_BURST / FWDSTEP]; //数据包的目的IP地址构成的数组
  1414. uint32_t flag[MAX_PKT_BURST / FWDSTEP];
  1415. uint16_t pnum[MAX_PKT_BURST + ]; //转发出口相同的数据包的编号
  1416. #endif
  1417.  
  1418. prev_tsc = ;
  1419.  
  1420. lcore_id = rte_lcore_id(); //获取lcore_id
  1421. qconf = &lcore_conf[lcore_id];//获取lcore_id的配置信息
  1422.  
  1423. if (qconf->n_rx_queue == ) { //如果lcore上没有接收队列
  1424. RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
  1425. return ;
  1426. }
  1427.  
  1428. RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id);
  1429.  
  1430. for (i = ; i < qconf->n_rx_queue; i++) { //遍历所有的接收队列
  1431.  
  1432. portid = qconf->rx_queue_list[i].port_id; //得到物理端口的编号
  1433. queueid = qconf->rx_queue_list[i].queue_id; //得到网卡队列的编号
  1434. RTE_LOG(INFO, L3FWD, " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n", lcore_id,
  1435. portid, queueid);
  1436. }
  1437.  
  1438. while () { //死循环,体现PMD思想
  1439.  
  1440. cur_tsc = rte_rdtsc();
  1441.  
  1442. /*
  1443. * TX burst queue drain
  1444. */
  1445. diff_tsc = cur_tsc - prev_tsc; //计算时间差
  1446. if (unlikely(diff_tsc > drain_tsc)) { //如果两次时间差大于定值
  1447.  
  1448. /*
  1449. * This could be optimized (use queueid instead of
  1450. * portid), but it is not called so often
  1451. */
  1452. for (portid = ; portid < RTE_MAX_ETHPORTS; portid++) {//遍历所有的物理端口
  1453. if (qconf->tx_mbufs[portid].len == )
  1454. continue;
  1455. send_burst(qconf,
  1456. qconf->tx_mbufs[portid].len,
  1457. portid);
  1458. qconf->tx_mbufs[portid].len = ;
  1459. }
  1460.  
  1461. prev_tsc = cur_tsc; //记下前一时间
  1462. }
  1463.  
  1464. /* 从接收队列中读取数据包
  1465. * Read packet from RX queues
  1466. */
  1467. for (i = ; i < qconf->n_rx_queue; ++i) { //遍历所有的接收队列
  1468. portid = qconf->rx_queue_list[i].port_id;//得到物理端口的编号
  1469. queueid = qconf->rx_queue_list[i].queue_id; //得到网卡队列的编号
  1470. nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
  1471. MAX_PKT_BURST); //在每个队列上尽量接收32个数据包,用nb_rx记录实际个数
  1472. if (nb_rx == ) //如果一个包也没有收到
  1473. continue;
  1474.  
  1475. #if (ENABLE_MULTI_BUFFER_OPTIMIZE == 1) //如果支持Intel SSE4.1特性
  1476. if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) //如果使用lpm
  1477.  
  1478. k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); //整除4
  1479. for (j = ; j != k; j += FWDSTEP) { //每次处理4个mbufs
  1480. processx4_step1(&pkts_burst[j], //从4个mbufs中读取目的ip地址和ol_flags
  1481. &dip[j / FWDSTEP],
  1482. &flag[j / FWDSTEP]);
  1483. }
  1484.  
  1485. k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
  1486. for (j = ; j != k; j += FWDSTEP) {//每次处理4个mbufs
  1487. processx4_step2(qconf, dip[j / FWDSTEP], //在LPM中查找转发出口,如果失败则把进入的端口作为转发出口
  1488. flag[j / FWDSTEP], portid,
  1489. &pkts_burst[j], &dst_port[j]);
  1490. }
  1491.  
  1492. /* 完成包处理,并根据相同的转发出口来分组连续的数据包
  1493. * Finish packet processing and group consecutive
  1494. * packets with the same destination port.
  1495. */
  1496. k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);//处理成4的幂
  1497. if (k != ) {
  1498. __m128i dp1, dp2;
  1499.  
  1500. lp = pnum;
  1501. lp[] = ;
  1502.  
  1503. processx4_step3(pkts_burst, dst_port); //更新目的mac和源mac地址
  1504.  
  1505. /* dp1: <d[0], d[1], d[2], d[3], ... > */
  1506. dp1 = _mm_loadu_si128((__m128i *)dst_port); //把目的端口加载到寄存器dp1中
  1507.  
  1508. for (j = FWDSTEP; j != k; j += FWDSTEP) { //每次处理4个mbufs
  1509. processx4_step3(&pkts_burst[j], //更新目的mac和源mac地址
  1510. &dst_port[j]);
  1511.  
  1512. /*
  1513. * dp2:
  1514. * <d[j-3], d[j-2], d[j-1], d[j], ... >
  1515. */
  1516. dp2 = _mm_loadu_si128((__m128i *) //返回一个__m128i的寄存器
  1517. &dst_port[j - FWDSTEP + ]);
  1518. lp = port_groupx4(&pnum[j - FWDSTEP], //把出口相同的4个数据包构成一组
  1519. lp, dp1, dp2);
  1520.  
  1521. /*
  1522. * dp1:
  1523. * <d[j], d[j+1], d[j+2], d[j+3], ... >
  1524. */
  1525. dp1 = _mm_srli_si128(dp2, //逻辑左移3*16位,返回一个__m128i的寄存器
  1526. (FWDSTEP - ) *
  1527. sizeof(dst_port[]));
  1528. }
  1529.  
  1530. /*
  1531. * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... >
  1532. */
  1533. dp2 = _mm_shufflelo_epi16(dp1, 0xf9); //重新排序,返回一个__m128i的寄存器
  1534. lp = port_groupx4(&pnum[j - FWDSTEP], lp, //把4个连续分组按照目的端口分组
  1535. dp1, dp2);
  1536.  
  1537. /*
  1538. * remove values added by the last repeated
  1539. * dst port.
  1540. */
  1541. lp[]--;
  1542. dlp = dst_port[j - ];
  1543. } else {
  1544. /* set dlp and lp to the never used values. */
  1545. dlp = BAD_PORT - ;
  1546. lp = pnum + MAX_PKT_BURST;
  1547. }
  1548.  
  1549. /*处理最后的三个分组 Process up to last 3 packets one by one. */
  1550. switch (nb_rx % FWDSTEP) {
  1551. case : //第三个mbuf
  1552. process_packet(qconf, pkts_burst[j],
  1553. dst_port + j, portid);
  1554. GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
  1555. j++;
  1556. case ://第二个mbuf
  1557. process_packet(qconf, pkts_burst[j],
  1558. dst_port + j, portid);
  1559. GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
  1560. j++;
  1561. case ://第一个mbuf
  1562. process_packet(qconf, pkts_burst[j],
  1563. dst_port + j, portid);
  1564. GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
  1565. j++;
  1566. }
  1567.  
  1568. /*通过目的端口把数据包都发出去,这些数据包之前已经组合好了的
  1569. * Send packets out, through destination port.
  1570. * Consecuteve pacekts with the same destination port
  1571. * are already grouped together.
  1572. * If destination port for the packet equals BAD_PORT,
  1573. * then free the packet without sending it out.
  1574. */
  1575. for (j = ; j < nb_rx; j += k) { //遍历接收到的数据包
  1576.  
  1577. int32_t m;
  1578. uint16_t pn;
  1579.  
  1580. pn = dst_port[j];
  1581. k = pnum[j];
  1582.  
  1583. if (likely(pn != BAD_PORT)) {
  1584. send_packetsx4(qconf, pn, //把待发送的数据包放到发送缓冲区中,累积到32个再发出去
  1585. pkts_burst + j, k);
  1586. } else {
  1587. for (m = j; m != j + k; m++)
  1588. rte_pktmbuf_free(pkts_burst[m]);
  1589. }
  1590. }
  1591.  
  1592. #endif /* APP_LOOKUP_METHOD */
  1593. #else /*如果不支持Intel SSE4.1特性 ENABLE_MULTI_BUFFER_OPTIMIZE == 0 */
  1594.  
  1595. /*预取接收队列上的第一个数据包 Prefetch first packets */
  1596. for (j = ; j < PREFETCH_OFFSET && j < nb_rx; j++) {
  1597. rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *));
  1598. }
  1599.  
  1600. /*预取和转发已经预取的数据包 Prefetch and forward already prefetched packets */
  1601. for (j = ; j < (nb_rx - PREFETCH_OFFSET); j++) {
  1602. rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
  1603. j + PREFETCH_OFFSET], void *));
  1604. l3fwd_simple_forward(pkts_burst[j], portid, qconf);//简单转发4倍数的数据包
  1605.  
  1606. }
  1607.  
  1608. /*转发正在预取的数据包 Forward remaining prefetched packets */
  1609. for (; j < nb_rx; j++) {
  1610. l3fwd_simple_forward(pkts_burst[j], portid, qconf);//简单转发剩余几个数据包
  1611.  
  1612. }
  1613. #endif /* ENABLE_MULTI_BUFFER_OPTIMIZE */
  1614.  
  1615. } //for (i = 0; i < qconf->n_rx_queue; ++i)
  1616. } //while (1)
  1617. }//end of main_loop
  1618.  
  1619. static int //检查lcore的参数
  1620. check_lcore_params(void)
  1621. {
  1622. uint8_t queue, lcore;
  1623. uint16_t i;
  1624. int socketid;
  1625.  
  1626. for (i = ; i < nb_lcore_params; ++i) { //遍历lcores的参数表
  1627. queue = lcore_params[i].queue_id;
  1628. if (queue >= MAX_RX_QUEUE_PER_PORT) { //如果队列编号大于128
  1629. printf("invalid queue number: %hhu\n", queue);
  1630. return -;
  1631. }
  1632. lcore = lcore_params[i].lcore_id;
  1633. if (!rte_lcore_is_enabled(lcore)) { //如果lcore没有启用
  1634. printf("error: lcore %hhu is not enabled in lcore mask\n", lcore);
  1635. return -;
  1636. }
  1637. if ((socketid = rte_lcore_to_socket_id(lcore) != ) &&
  1638. (numa_on == )) { //如果numa关闭
  1639. printf("warning: lcore %hhu is on socket %d with numa off \n",
  1640. lcore, socketid);
  1641. }
  1642. }
  1643. return ;
  1644. }
  1645.  
  1646. static int //检查物理端口的配置
  1647. check_port_config(const unsigned nb_ports)
  1648. {
  1649. unsigned portid;
  1650. uint16_t i;
  1651.  
  1652. for (i = ; i < nb_lcore_params; ++i) { //遍历lcores的参数表
  1653. portid = lcore_params[i].port_id;
  1654. if ((enabled_port_mask & ( << portid)) == ) {
  1655. printf("port %u is not enabled in port mask\n", portid);
  1656. return -;
  1657. }
  1658. if (portid >= nb_ports) {
  1659. printf("port %u is not present on the board\n", portid);
  1660. return -;
  1661. }
  1662. }
  1663. return ;
  1664. }
  1665.  
  1666. static uint8_t //获取物理端口上的接收队列数量
  1667. get_port_n_rx_queues(const uint8_t port) //其实就是取queue_id最大值加1
  1668. {
  1669. int queue = -;
  1670. uint16_t i;
  1671.  
  1672. for (i = ; i < nb_lcore_params; ++i) { //遍历lcores的参数表
  1673. if (lcore_params[i].port_id == port && lcore_params[i].queue_id > queue)
  1674. queue = lcore_params[i].queue_id;//获取queue_id值
  1675. }
  1676. return (uint8_t)(++queue); //因为queue_id从0开始
  1677. }
  1678.  
  1679. static int //初始化lcore上的接收队列
  1680. init_lcore_rx_queues(void)
  1681. {
  1682. uint16_t i, nb_rx_queue;
  1683. uint8_t lcore;
  1684.  
  1685. for (i = ; i < nb_lcore_params; ++i) {//遍历lcores的参数表
  1686. lcore = lcore_params[i].lcore_id;
  1687. nb_rx_queue = lcore_conf[lcore].n_rx_queue;
  1688. if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {//如果接收队列总数大于128
  1689. printf("error: too many queues (%u) for lcore: %u\n",
  1690. (unsigned)nb_rx_queue + , (unsigned)lcore);
  1691. return -;
  1692. } else {
  1693. lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
  1694. lcore_params[i].port_id; //记录port_id
  1695. lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
  1696. lcore_params[i].queue_id; //记录queue_id
  1697. lcore_conf[lcore].n_rx_queue++;//lcore上接收队列的数量加1
  1698. }
  1699. }
  1700. return ;
  1701. }
  1702.  
  1703. /* display usage */
  1704. static void //打印使用说明
  1705. print_usage(const char *prgname)
  1706. {
  1707. printf ("%s [EAL options] -- -p PORTMASK -P"
  1708. " [--config (port,queue,lcore)[,(port,queue,lcore]]"
  1709. " [--enable-jumbo [--max-pkt-len PKTLEN]]\n"
  1710. " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
  1711. " -P : enable promiscuous mode\n"
  1712. " --config (port,queue,lcore): rx queues configuration\n"
  1713. " --no-numa: optional, disable numa awareness\n"
  1714. " --ipv6: optional, specify it if running ipv6 packets\n"
  1715. " --enable-jumbo: enable jumbo frame"
  1716. " which max packet len is PKTLEN in decimal (64-9600)\n"
  1717. " --hash-entry-num: specify the hash entry number in hexadecimal to be setup\n",
  1718. prgname);
  1719. }
  1720.  
  1721. static int //分析数据包的长度
  1722. parse_max_pkt_len(const char *pktlen)
  1723. {
  1724. char *end = NULL;
  1725. unsigned long len;
  1726.  
  1727. /* parse decimal string */
  1728. len = strtoul(pktlen, &end, ); //把字符串转换成十进制数字
  1729. if ((pktlen[] == '\0') || (end == NULL) || (*end != '\0'))
  1730. return -;
  1731.  
  1732. if (len == )
  1733. return -;
  1734.  
  1735. return len;
  1736. }
  1737.  
  1738. static int //分析物理端口的掩码
  1739. parse_portmask(const char *portmask)
  1740. {
  1741. char *end = NULL;
  1742. unsigned long pm;
  1743.  
  1744. /* parse hexadecimal string */
  1745. pm = strtoul(portmask, &end, );//字符串转换为十六进制的数字
  1746. if ((portmask[] == '\0') || (end == NULL) || (*end != '\0'))
  1747. return -;
  1748.  
  1749. if (pm == )
  1750. return -;
  1751.  
  1752. return pm;
  1753. }
  1754.  
  1755. #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
  1756. static int
  1757. parse_hash_entry_number(const char *hash_entry_num)
  1758. {
  1759. char *end = NULL;
  1760. unsigned long hash_en;
  1761. /* parse hexadecimal string */
  1762. hash_en = strtoul(hash_entry_num, &end, );
  1763. if ((hash_entry_num[] == '\0') || (end == NULL) || (*end != '\0'))
  1764. return -;
  1765.  
  1766. if (hash_en == )
  1767. return -;
  1768.  
  1769. return hash_en;
  1770. }
  1771. #endif
  1772.  
  1773. static int //分析参数中的配置
  1774. parse_config(const char *q_arg)
  1775. {
  1776. char s[];
  1777. const char *p, *p0 = q_arg;
  1778. char *end;
  1779. enum fieldnames {
  1780. FLD_PORT = ,
  1781. FLD_QUEUE,
  1782. FLD_LCORE,
  1783. _NUM_FLD
  1784. };
  1785. unsigned long int_fld[_NUM_FLD];
  1786. char *str_fld[_NUM_FLD];
  1787. int i;
  1788. unsigned size;
  1789.  
  1790. nb_lcore_params = ; //数组的元素个数初始化为0
  1791. //举例: --config="(0,0,1),(0,1,2),(1,0,1),(1,1,3)"
  1792. while ((p = strchr(p0,'(')) != NULL) { //找到左括号的位置,并赋值给p,除非找不到左括号才结束while循环
  1793. ++p;
  1794. if((p0 = strchr(p,')')) == NULL) //找到有括号的位置,并赋值给p0
  1795. return -;
  1796.  
  1797. size = p0 - p; //计算括号内的字符串长度
  1798. if(size >= sizeof(s))
  1799. return -;
  1800.  
  1801. snprintf(s, sizeof(s), "%.*s", size, p); //按照size宽度拼接字符串s
  1802. if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)//分割字符串s到str_fld中
  1803. return -;
  1804. for (i = ; i < _NUM_FLD; i++){//遍历各个成员
  1805. errno = ;
  1806. int_fld[i] = strtoul(str_fld[i], &end, );//获取port_id、queue_id、lcore_id成员的值
  1807. if (errno != || end == str_fld[i] || int_fld[i] > )
  1808. return -;
  1809. }
  1810. if (nb_lcore_params >= MAX_LCORE_PARAMS) {
  1811. printf("exceeded max number of lcore params: %hu\n",
  1812. nb_lcore_params);
  1813. return -;
  1814. }
  1815. lcore_params_array[nb_lcore_params].port_id = (uint8_t)int_fld[FLD_PORT];//赋值port_id
  1816. lcore_params_array[nb_lcore_params].queue_id = (uint8_t)int_fld[FLD_QUEUE];//赋值queue_id
  1817. lcore_params_array[nb_lcore_params].lcore_id = (uint8_t)int_fld[FLD_LCORE];//赋值lcore_id
  1818. ++nb_lcore_params; //数组的元素个数自增
  1819. }
  1820. lcore_params = lcore_params_array;//使用新配置,抛弃默认配置
  1821. return ;
  1822. }
  1823.  
  1824. #define CMD_LINE_OPT_CONFIG "config"
  1825. #define CMD_LINE_OPT_NO_NUMA "no-numa"
  1826. #define CMD_LINE_OPT_IPV6 "ipv6"
  1827. #define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo"
  1828. #define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num"
  1829.  
  1830. /* Parse the argument given in the command line of the application */
  1831. static int //分析l3fwd相关的参数
  1832. parse_args(int argc, char **argv)
  1833. {
  1834. int opt, ret;
  1835. char **argvopt;
  1836. int option_index;
  1837. char *prgname = argv[];
  1838. static struct option lgopts[] = {
  1839. {CMD_LINE_OPT_CONFIG, , , }, //config参数对应于case 0
  1840. {CMD_LINE_OPT_NO_NUMA, , , },
  1841. {CMD_LINE_OPT_IPV6, , , },
  1842. {CMD_LINE_OPT_ENABLE_JUMBO, , , },
  1843. {CMD_LINE_OPT_HASH_ENTRY_NUM, , , },
  1844. {NULL, , , }//应该可以在这个地方加上kni_config命令字
  1845.  
  1846. };
  1847.  
  1848. argvopt = argv;
  1849.  
  1850. while ((opt = getopt_long(argc, argvopt, "p:P",
  1851. lgopts, &option_index)) != EOF) {
  1852.  
  1853. switch (opt) {
  1854. /* portmask 物理端口的掩码*/
  1855. case 'p':
  1856. enabled_port_mask = parse_portmask(optarg);//optarg为指向当前选项参数的指针
  1857. if (enabled_port_mask == ) {
  1858. printf("invalid portmask\n");
  1859. print_usage(prgname);
  1860. return -;
  1861. }
  1862. break;
  1863. case 'P': //混杂模式
  1864. printf("Promiscuous mode selected\n");
  1865. promiscuous_on = ;
  1866. break;
  1867.  
  1868. /* long options 解析长选项 */
  1869. case :
  1870. if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_CONFIG,
  1871. sizeof (CMD_LINE_OPT_CONFIG))) { //参数config
  1872. ret = parse_config(optarg);//解析()中的参数
  1873. if (ret) {
  1874. printf("invalid config\n");
  1875. print_usage(prgname);
  1876. return -;
  1877. }
  1878. }
  1879.  
  1880. if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_NO_NUMA,
  1881. sizeof(CMD_LINE_OPT_NO_NUMA))) { //参数no-numa
  1882. printf("numa is disabled \n");
  1883. numa_on = ;
  1884. }
  1885.  
  1886. #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
  1887. if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_IPV6,
  1888. sizeof(CMD_LINE_OPT_IPV6))) { //参数ipv6
  1889. printf("ipv6 is specified \n");
  1890. ipv6 = ;
  1891. }
  1892. #endif
  1893.  
  1894. if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_ENABLE_JUMBO,
  1895. sizeof (CMD_LINE_OPT_ENABLE_JUMBO))) {//参数enable-jumbo
  1896. struct option lenopts = {"max-pkt-len", required_argument, , };
  1897.  
  1898. printf("jumbo frame is enabled - disabling simple TX path\n");
  1899. port_conf.rxmode.jumbo_frame = ;
  1900.  
  1901. /* if no max-pkt-len set, use the default value ETHER_MAX_LEN */
  1902. if ( == getopt_long(argc, argvopt, "", &lenopts, &option_index)) {
  1903. ret = parse_max_pkt_len(optarg); //分析数据包的长度
  1904. if ((ret < ) || (ret > MAX_JUMBO_PKT_LEN)){
  1905. printf("invalid packet length\n");
  1906. print_usage(prgname);
  1907. return -;
  1908. }
  1909. port_conf.rxmode.max_rx_pkt_len = ret;
  1910. }
  1911. printf("set jumbo frame max packet length to %u\n",
  1912. (unsigned int)port_conf.rxmode.max_rx_pkt_len);
  1913. }
  1914. #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
  1915. if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_HASH_ENTRY_NUM,
  1916. sizeof(CMD_LINE_OPT_HASH_ENTRY_NUM))) {//参数hash-entry-num
  1917. ret = parse_hash_entry_number(optarg);
  1918. if ((ret > ) && (ret <= L3FWD_HASH_ENTRIES)) {
  1919. hash_entry_number = ret;
  1920. } else {
  1921. printf("invalid hash entry number\n");
  1922. print_usage(prgname);
  1923. return -;
  1924. }
  1925. }
  1926. #endif
  1927. break;
  1928.  
  1929. default:
  1930. print_usage(prgname);
  1931. return -;
  1932. }
  1933. }
  1934.  
  1935. if (optind >= )
  1936. argv[optind-] = prgname;
  1937.  
  1938. ret = optind-;
  1939. optind = ; /* optind是下一个选项的索引 reset getopt lib */
  1940. return ret;
  1941. }
  1942.  
  1943. static void //打印mac地址
  1944. print_ethaddr(const char *name, const struct ether_addr *eth_addr)
  1945. {
  1946. char buf[ETHER_ADDR_FMT_SIZE];
  1947. ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
  1948. printf("%s%s", name, buf);
  1949. }
  1950.  
  1951. #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
  1952. static void //创建LPM
  1953. setup_lpm(int socketid)
  1954. {
  1955. struct rte_lpm6_config config;
  1956. unsigned i;
  1957. int ret;
  1958. char s[];
  1959.  
  1960. /* 创建LPM ipv4表 create the LPM table */
  1961. snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid);
  1962. ipv4_l3fwd_lookup_struct[socketid] = rte_lpm_create(s, socketid,
  1963. IPV4_L3FWD_LPM_MAX_RULES, );
  1964. if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
  1965. rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
  1966. " on socket %d\n", socketid);
  1967.  
  1968. /* 填充ipv4 LPM表 populate the LPM table */
  1969.  
  1970. for (i = ; i < IPV4_L3FWD_NUM_ROUTES; i++) {//遍历已经配置的所有的规则
  1971.  
  1972. /* skip unused ports 跳过未使用的物理端口*/
  1973. if (( << ipv4_l3fwd_route_array[i].if_out &
  1974. enabled_port_mask) == )
  1975. continue;
  1976.  
  1977. //添加一条路由,即把规则转换为tbl24或者tbl8
  1978. ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid],
  1979. ipv4_l3fwd_route_array[i].ip,
  1980. ipv4_l3fwd_route_array[i].depth,
  1981. ipv4_l3fwd_route_array[i].if_out);
  1982.  
  1983. if (ret < ) { //如果添加路由失败
  1984. rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
  1985. "l3fwd LPM table on socket %d\n",
  1986. i, socketid);
  1987. }
  1988.  
  1989. printf("LPM: Adding route 0x%08x / %d (%d)\n",
  1990. (unsigned)ipv4_l3fwd_route_array[i].ip,
  1991. ipv4_l3fwd_route_array[i].depth,
  1992. ipv4_l3fwd_route_array[i].if_out);
  1993. }
  1994.  
  1995. /* 创建lpm ipv6表 create the LPM6 table */
  1996. snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid);
  1997.  
  1998. config.max_rules = IPV6_L3FWD_LPM_MAX_RULES;
  1999. config.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S;
  2000. config.flags = ;
  2001. ipv6_l3fwd_lookup_struct[socketid] = rte_lpm6_create(s, socketid,
  2002. &config);
  2003. if (ipv6_l3fwd_lookup_struct[socketid] == NULL)
  2004. rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
  2005. " on socket %d\n", socketid);
  2006.  
  2007. /* 填充LPM ipv6表 populate the LPM table */
  2008. for (i = ; i < IPV6_L3FWD_NUM_ROUTES; i++) {
  2009.  
  2010. /* skip unused ports */
  2011. if (( << ipv6_l3fwd_route_array[i].if_out &
  2012. enabled_port_mask) == )
  2013. continue;
  2014.  
  2015. ret = rte_lpm6_add(ipv6_l3fwd_lookup_struct[socketid],
  2016. ipv6_l3fwd_route_array[i].ip,
  2017. ipv6_l3fwd_route_array[i].depth,
  2018. ipv6_l3fwd_route_array[i].if_out);
  2019.  
  2020. if (ret < ) {
  2021. rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
  2022. "l3fwd LPM table on socket %d\n",
  2023. i, socketid);
  2024. }
  2025.  
  2026. printf("LPM: Adding route %s / %d (%d)\n",
  2027. "IPV6",
  2028. ipv6_l3fwd_route_array[i].depth,
  2029. ipv6_l3fwd_route_array[i].if_out);
  2030. }
  2031. }
  2032. #endif
  2033.  
  2034. static int //初始化内存
  2035. init_mem(unsigned nb_mbuf)
  2036. {
  2037. struct lcore_conf *qconf;
  2038. int socketid;
  2039. unsigned lcore_id;
  2040. char s[];
  2041.  
  2042. for (lcore_id = ; lcore_id < RTE_MAX_LCORE; lcore_id++) {//遍历所有lcores
  2043. if (rte_lcore_is_enabled(lcore_id) == )
  2044. continue;
  2045.  
  2046. if (numa_on) //一般开启了numa
  2047. socketid = rte_lcore_to_socket_id(lcore_id);//得到lcore所在的socketid
  2048. else
  2049. socketid = ; //默认socketid为0
  2050.  
  2051. if (socketid >= NB_SOCKETS) {
  2052. rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n",
  2053. socketid, lcore_id, NB_SOCKETS);
  2054. }
  2055. if (pktmbuf_pool[socketid] == NULL) {
  2056. snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
  2057. pktmbuf_pool[socketid] = //为每一个socket创建mempool用来动态分配mbufs
  2058. rte_mempool_create(s, nb_mbuf, MBUF_SIZE, MEMPOOL_CACHE_SIZE,
  2059. sizeof(struct rte_pktmbuf_pool_private),
  2060. rte_pktmbuf_pool_init, NULL,
  2061. rte_pktmbuf_init, NULL,
  2062. socketid, );
  2063. if (pktmbuf_pool[socketid] == NULL)
  2064. rte_exit(EXIT_FAILURE,
  2065. "Cannot init mbuf pool on socket %d\n", socketid);
  2066. else
  2067. printf("Allocated mbuf pool on socket %d\n", socketid);
  2068.  
  2069. #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
  2070. setup_lpm(socketid); //创建LPM表,只需给每个socket cpu创建一个LPM表,而同一个CPU上的lcores共享LPM
  2071. #else
  2072. setup_hash(socketid); //创建Hash表
  2073. #endif
  2074. }
  2075. qconf = &lcore_conf[lcore_id];
  2076. qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid];
  2077. qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid];
  2078. }
  2079. return ;
  2080. }
  2081.  
  2082. /* Check the link status of all ports in up to 9s, and print them finally */
  2083. static void //检查物理端口的连接状态
  2084. check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
  2085. {
  2086. #define CHECK_INTERVAL 100 /* 100ms */
  2087. #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
  2088. uint8_t portid, count, all_ports_up, print_flag = ;
  2089. struct rte_eth_link link;
  2090.  
  2091. printf("\nChecking link status");
  2092. fflush(stdout);
  2093. for (count = ; count <= MAX_CHECK_TIME; count++) {//最多执行9000次
  2094. all_ports_up = ;
  2095. for (portid = ; portid < port_num; portid++) {//遍历物理端口
  2096. if ((port_mask & ( << portid)) == )
  2097. continue;
  2098. memset(&link, , sizeof(link));
  2099. rte_eth_link_get_nowait(portid, &link);
  2100. /* print link status if flag set */
  2101. if (print_flag == ) {
  2102. if (link.link_status)
  2103. printf("Port %d Link Up - speed %u "
  2104. "Mbps - %s\n", (uint8_t)portid,
  2105. (unsigned)link.link_speed,
  2106. (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
  2107. ("full-duplex") : ("half-duplex\n"));
  2108. else
  2109. printf("Port %d Link Down\n",
  2110. (uint8_t)portid);
  2111. continue;
  2112. }
  2113. /* clear all_ports_up flag if any link down */
  2114. if (link.link_status == ) {
  2115. all_ports_up = ;
  2116. break;
  2117. }
  2118. }
  2119. /* after finally printing all link status, get out */
  2120. if (print_flag == )
  2121. break;
  2122.  
  2123. if (all_ports_up == ) {
  2124. printf(".");
  2125. fflush(stdout);
  2126. rte_delay_ms(CHECK_INTERVAL);
  2127. }
  2128.  
  2129. /* set the print_flag if all ports up or timeout */
  2130. if (all_ports_up == || count == (MAX_CHECK_TIME - )) {
  2131. print_flag = ;
  2132. printf("done\n");
  2133. }
  2134. }
  2135. }
  2136.  
  2137. int //主函数
  2138. main(int argc, char **argv)
  2139. {
  2140. struct lcore_conf *qconf;
  2141. struct rte_eth_dev_info dev_info;
  2142. struct rte_eth_txconf *txconf;
  2143. int ret;
  2144. unsigned nb_ports;
  2145. uint16_t queueid;
  2146. unsigned lcore_id;
  2147. uint32_t n_tx_queue, nb_lcores;
  2148. uint8_t portid, nb_rx_queue, queue, socketid;
  2149.  
  2150. /* init EAL */
  2151. ret = rte_eal_init(argc, argv); //初始化软件抽象层,并解析EAL有关参数
  2152. if (ret < )
  2153. rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
  2154. argc -= ret; //减少参数个数
  2155. argv += ret; //移动参数位置
  2156.  
  2157. /* parse application arguments (after the EAL ones) */
  2158. ret = parse_args(argc, argv); //解析l3fwd有关参数: -p -P --config
  2159. if (ret < )
  2160. rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n");
  2161.  
  2162. if (check_lcore_params() < ) //检查lcore参数
  2163. rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
  2164.  
  2165. ret = init_lcore_rx_queues(); //初始化每个lcore上的rx queue数量
  2166. if (ret < )
  2167. rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n");
  2168.  
  2169. nb_ports = rte_eth_dev_count(); //获取物理端口的个数
  2170. if (nb_ports > RTE_MAX_ETHPORTS) //如果超过32个
  2171. nb_ports = RTE_MAX_ETHPORTS;
  2172.  
  2173. if (check_port_config(nb_ports) < ) //检查物理端口的配置
  2174. rte_exit(EXIT_FAILURE, "check_port_config failed\n");
  2175.  
  2176. nb_lcores = rte_lcore_count(); //获取启用的lcores的总个数
  2177.  
  2178. /* initialize all ports 初始化所有的物理端口 */
  2179. for (portid = ; portid < nb_ports; portid++) { //遍历所有的物理端口
  2180. /* skip ports that are not enabled 跳过没有启用的物理端口 */
  2181. if ((enabled_port_mask & ( << portid)) == ) {
  2182. printf("\nSkipping disabled port %d\n", portid);
  2183. continue;
  2184. }
  2185.  
  2186. /* init port 初始化物理端口*/
  2187. printf("Initializing port %d ... ", portid );
  2188. fflush(stdout); //清空标准输出(屏幕)的缓冲区,这样就能立即在屏幕上看到打印信息
  2189.  
  2190. nb_rx_queue = get_port_n_rx_queues(portid); //获取portid上的接收队列的个数
  2191. n_tx_queue = nb_lcores; //设定portid上的发送队列的个数为启用的lcores的个数
  2192. if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) //如果发送队列的数量超过16个
  2193. n_tx_queue = MAX_TX_QUEUE_PER_PORT;
  2194. printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
  2195. nb_rx_queue, (unsigned)n_tx_queue ); //这里是不是有点粗暴啊?????
  2196. ret = rte_eth_dev_configure(portid, nb_rx_queue, //第一步,配置网络设备
  2197. (uint16_t)n_tx_queue, &port_conf);
  2198. if (ret < ) //如果配置设备失败
  2199. rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n",
  2200. ret, portid);
  2201.  
  2202. rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); //记录mac地址到ports_eth_addr[portid]
  2203. print_ethaddr(" Address:", &ports_eth_addr[portid]);
  2204. printf(", ");
  2205.  
  2206. /* 为每一个物理端口准备着源mac地址和目的mac地址
  2207. * prepare dst and src MACs for each port.
  2208. */
  2209. *(uint64_t *)(val_eth + portid) =
  2210. ETHER_LOCAL_ADMIN_ADDR + ((uint64_t)portid << );
  2211. ether_addr_copy(&ports_eth_addr[portid], //前一个参数为from,后一个为to
  2212. (struct ether_addr *)(val_eth + portid) + );
  2213. /* init memory 分配内存并创建LPM或者hash */
  2214. ret = init_mem(NB_MBUF); //mempool包含8192个元素
  2215. if (ret < )
  2216. rte_exit(EXIT_FAILURE, "init_mem failed\n");
  2217.  
  2218. /*初始化一个发送队列成一对(lcore, port) init one TX queue per couple (lcore,port) */
  2219. queueid = ;
  2220. for (lcore_id = ; lcore_id < RTE_MAX_LCORE; lcore_id++) { //遍历一个物理接口上的所有的lcores
  2221. if (rte_lcore_is_enabled(lcore_id) == ) //忽略未启用的lcore
  2222. continue;
  2223.  
  2224. if (numa_on)//如果启用numa
  2225. socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id); //获取lcore_id所在的socketid
  2226. else
  2227. socketid = ;//默认socketid为0
  2228.  
  2229. printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
  2230. fflush(stdout);//清空标准输出(屏幕)的缓冲区
  2231.  
  2232. rte_eth_dev_info_get(portid, &dev_info);//获取设备信息
  2233. txconf = &dev_info.default_txconf;//得到发送的配置结构体指针
  2234. if (port_conf.rxmode.jumbo_frame)
  2235. txconf->txq_flags = ;
  2236. ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, //第二步,建立发送队列
  2237. socketid, txconf); //一个port上可能有多个queue,每个queue用一个lcore来绑定
  2238. if (ret < )
  2239. rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, "
  2240. "port=%d\n", ret, portid);
  2241.  
  2242. qconf = &lcore_conf[lcore_id]; //得到lcore_id的配置结构体指针
  2243. qconf->tx_queue_id[portid] = queueid; //记录发送队列的编号到lcore_conf中
  2244. queueid++; //发送队列的编号自增
  2245. } //end of for(lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
  2246. printf("\n");
  2247. } //end of for(portid = 0; portid < nb_ports; portid++)
  2248.  
  2249. for (lcore_id = ; lcore_id < RTE_MAX_LCORE; lcore_id++) { //遍历所有的lcores
  2250. if (rte_lcore_is_enabled(lcore_id) == )
  2251. continue; //忽略未启用的lcore
  2252. qconf = &lcore_conf[lcore_id];
  2253. printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
  2254. fflush(stdout);
  2255. /* init RX queues 初始化接收队列 */
  2256. for(queue = ; queue < qconf->n_rx_queue; ++queue) { //遍历所有的接收队列
  2257. portid = qconf->rx_queue_list[queue].port_id; //物理端口的编号
  2258. queueid = qconf->rx_queue_list[queue].queue_id;//接收队列的编号
  2259.  
  2260. if (numa_on)//一般启用numa
  2261. socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id);//获取lcore_id所在的socketid
  2262. else
  2263. socketid = ;//默认socketid为0
  2264.  
  2265. printf("rxq=%d,%d,%d ", portid, queueid, socketid);
  2266. fflush(stdout);//清空标准输出(屏幕)的缓冲区
  2267.  
  2268. ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, //第三步,建立接收队列
  2269. socketid, //一个port上可能有多个queue,每个queue用一个lcore来绑定
  2270. NULL,
  2271. pktmbuf_pool[socketid]);
  2272. if (ret < )
  2273. rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d,"
  2274. "port=%d\n", ret, portid);
  2275. } //for(queue = 0; queue < qconf->n_rx_queue; ++queue)
  2276. }//for(lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
  2277.  
  2278. printf("\n");
  2279.  
  2280. /* start ports 启动物理端口 */
  2281. for (portid = ; portid < nb_ports; portid++) { //遍历所有的物理端口
  2282. if ((enabled_port_mask & ( << portid)) == ) {
  2283. continue; //忽略未启用的物理端口
  2284. }
  2285. /* Start device 启动设备 */
  2286. ret = rte_eth_dev_start(portid); //第四步,启动物理端口
  2287. if (ret < )
  2288. rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n",
  2289. ret, portid);
  2290.  
  2291. /*
  2292. * If enabled, put device in promiscuous mode.
  2293. * This allows IO forwarding mode to forward packets
  2294. * to itself through 2 cross-connected ports of the
  2295. * target machine.
  2296. */
  2297. if (promiscuous_on) //如果开始混杂模式
  2298. rte_eth_promiscuous_enable(portid); //启动混杂模式
  2299. }//end of for (portid = 0; portid < nb_ports; portid++)
  2300.  
  2301. check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
  2302.  
  2303. /* launch per-lcore init on every lcore 在每一个lcore上至多启动一个线程 */
  2304. rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);//CALL_MASTER表示在master也会启动线程
  2305. RTE_LCORE_FOREACH_SLAVE(lcore_id) { //遍历每个slave lcore
  2306. if (rte_eal_wait_lcore(lcore_id) < ) //等待线程结束
  2307. return -;
  2308. }
  2309.  
  2310. return ;
  2311. }

DPDK l3fwd的更多相关文章

  1. DPDK L3fwd 源码阅读

    代码部分 整个L3fwd有三千多行代码,但总体思想就是在L2fwd的基础上,增加网络层的根据 IP 地址进行路由查找的内容. main.c 文件 int main(int argc, char **a ...

  2. Linux平台上DPDK入门指南

    1. 简介 本文档包含DPDK软件安装和配置的相关说明.旨在帮助用户快速启动和运行软件.文档主要描述了在Linux环境下编译和 运行DPDK应用程序,但是文档并不深入DPDK的具体实现细节. 1.1. ...

  3. [dpdk] 熟悉SDK与初步使用 (四)(L3 Forwarding源码分析)

    接续前节:[dpdk] 熟悉SDK与初步使用 (三)(IP Fragmentation源码分析) 前文中的最后一个问题,搁置,并没有找到答案.所以继续阅读其他例子的代码,想必定能在其他位置看到答案. ...

  4. [dpdk] 读官方文档(3)

    续前节, 测试小程序 1. 想编译测试程序首先需要设置两个环境变量,为什么呢,因为测试程序的Makefile里用了... rpm装了打包好的devel包,这个rpm也会自带这两个环境变量.就是说写第三 ...

  5. [developmemt][dpdk] dpdk优化(转)

    转发:https://software.intel.com/en-us/articles/dpdk-performance-optimization-guidelines-white-paper 转发 ...

  6. [daily][dpdk] 网卡offload识别包类型;如何模拟环境构造一个vlan包

    第一部分 硬件识别包类型 网卡,是可以识别包类型的.在dpdk的API中.识别完之后,存在这个结构里: struct rte_mbuf { ...... union { uint32_t packet ...

  7. dpdk优化相关 转

    注:本文是参照了一些其他文章,原文地址点击这里. 首先根据这篇文章进行了性能瓶颈的分析 策略与方法 首先根据木桶原理,首先要找到最弱的地方,怎么找往上看↑. 想能优化需要考虑如下: 优化BIOS设置 ...

  8. DPDK应用示例指南简介(汇总)

    DPDK应用示例指南简介 <DPDK示例>系列文章主要是学习.记录.翻译DPDK官方示例文档.为了更好地理解和学习DPDK, 特通过对源码中的经典示例进行整理,供大家学习.交流和讨论. A ...

  9. Intel 推出 DPDK 开发包的意义是什么?

    Intel 推出 DPDK 开发包的意义是什么? http://www.zhihu.com/question/27413080?sort=created 基于intel dpdk的包处理器,相较于基于 ...

随机推荐

  1. sql 循环表中记录

    =========================================================================循环排序查询数据=================== ...

  2. Leetcode-Combinations Sum II

    Given a collection of candidate numbers (C) and a target number (T), find all unique combinations in ...

  3. Leetcode-Convert Sorted Array to BST

    Given an array where elements are sorted in ascending order, convert it to a height balanced BST. So ...

  4. 170228、Linux操作系统安装ELK stack日志管理系统--(1)Logstash和Filebeat的安装与使用

    安装测试环境:Ubuntu 16.04.2 LTS 前言 (1)ELK是Elasticsearch,Logstash,Kibana 开源软件的集合,对外是作为一个日志管理系统的开源方案.它可以从任何来 ...

  5. SQL架构信息读取

    --架构: select * from information_schema.SCHEMATA --表: select table_name from information_schema.table ...

  6. WebBrowser 控件-说明

    WebBrowser.Document 为活动的文档返回自动化对象,引用 Microsoft HTML Object Library 可查看详细属性和方法 下面的解说假设窗体中有一个名称为 Web1 ...

  7. Maven 整合SSH框架之pom.xml

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/20 ...

  8. JS改变HTML元素的绝对坐标

    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DT ...

  9. 【Linux】Ubuntu下录屏&amp;&amp;制作GIF

    在做Android的时候,想制作GIF用来演示效果.一番摸索.找到了一个简单可行的办法: App在模拟器中执行,用录屏软件录制.再将视频转成GIF. 系统: Ubuntu 15.04 录屏软件: Re ...

  10. 使用jQuery重用form表单并异步提交到其它action

    在做页面开发的时候,有时候要重用表单的数据,并异步请求提交到其它的链接中,这个时候就能够使用jquery去改动表单的action值(记得使用后改动回来).并调用submit方法,当然后台的链接acti ...