原文链接:http://ry0117.com/2016/12/24/OVS内核KEY值提取及匹配流表代码分析/

当开启OVS后,创建datapath类型为system的网桥并他添加相关接口,OVS网桥内接口在网卡接收到数据包后,数据包会先到OVS的内核模块openvswitch内,从数据包上提取key值,并使用key值匹配OVS内核模块中的流表,当匹配到相应的流表后,则执行流表上相应的动作;

当在OVS内核缓存中匹配不到流表,则将key值信息通过NetLink发送给用户态的ovs-vswitchd守护进程,由其来决定如何处理数据包。

下面就Linux-3.19版本内核中OpenvSwitch内核模块中的提取Key值、匹配流表及执行流表动作相关的代码做一下分析。

提取KEY值(datapath/flow.c)

Key值信息是匹配流表的前提,key值中包括很多的信息,包括源MAC地址、目的MAC地址、VLAN信息、协议类型、源IP地址,目的IP地址、端口号等信息,所有的key值都可以从skb数据包中提取到。

  1. int
  2. ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
  3. struct sk_buff *skb, struct sw_flow_key *key)
  4. {
  5. /* Extract metadata from packet. */
  6. if (tun_info) {
  7. memcpy(&key->tun_key, &tun_info->tunnel, sizeof(key->tun_key));
  8. if (tun_info->options) {
  9. BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
  10. 8)) - 1
  11. > sizeof(key->tun_opts));
  12. memcpy(GENEVE_OPTS(key, tun_info->options_len),
  13. tun_info->options, tun_info->options_len);
  14. key->tun_opts_len = tun_info->options_len;
  15. } else {
  16. key->tun_opts_len = 0;
  17. }
  18. } else {
  19. key->tun_opts_len = 0;
  20. memset(&key->tun_key, 0, sizeof(key->tun_key));
  21. }
  22. /*根据skb相关信息,给key的相关变量赋值*/
  23. key->phy.priority = skb->priority;
  24. /*设置key->phy.in_port为vport的接口序号*/
  25. key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
  26. key->phy.skb_mark = skb->mark;
  27. key->ovs_flow_hash = 0;
  28. key->recirc_id = 0;
  29. /*提取skb中的MAC、协议、IP地址、端口信息等key值*/
  30. return key_extract(skb, key);
  31. }
  32. /**
  33. * key_extract - extracts a flow key from an Ethernet frame.
  34. * @skb: sk_buff that contains the frame, with skb->data pointing to the
  35. * Ethernet header
  36. * @key: output flow key
  37. *
  38. * The caller must ensure that skb->len >= ETH_HLEN.
  39. *
  40. * Returns 0 if successful, otherwise a negative errno value.
  41. *
  42. * Initializes @skb header pointers as follows:
  43. *
  44. * - skb->mac_header: the Ethernet header.
  45. *
  46. * - skb->network_header: just past the Ethernet header, or just past the
  47. * VLAN header, to the first byte of the Ethernet payload.
  48. *
  49. * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
  50. * on output, then just past the IP header, if one is present and
  51. * of a correct length, otherwise the same as skb->network_header.
  52. * For other key->eth.type values it is left untouched.
  53. */
  54. static int
  55. key_extract(struct sk_buff *skb, struct sw_flow_key *key)
  56. {
  57. int error;
  58. struct ethhdr *eth;
  59. /* Flags are always used as part of stats */
  60. key->tp.flags = 0;
  61. /*重置L2层头指针*/
  62. skb_reset_mac_header(skb);
  63. /* Link layer. We are guaranteed to have at least the 14 byte Ethernet
  64. * header in the linear data area.
  65. */
  66. /*获取二层头部指针,并提取源MAC及目的MAC信息到key中*/
  67. eth = eth_hdr(skb);
  68. ether_addr_copy(key->eth.src, eth->h_source);
  69. ether_addr_copy(key->eth.dst, eth->h_dest);
  70. /*将skb->data指向L2的MAC地址结束的地址处,
  71. * 如果带有VLAN信息,则skb->data指向vlan相关信息的开头
  72. * 如果不带vlan信息,则skb->data则指向eth.type字段处*/
  73. __skb_pull(skb, 2 * ETH_ALEN);
  74. /* We are going to push all headers that we pull, so no need to
  75. * update skb->csum here.
  76. */
  77. /*提取vlan信息到key中*/
  78. key->eth.tci = 0;
  79. if (vlan_tx_tag_present(skb))
  80. key->eth.tci = htons(skb->vlan_tci);
  81. else if (eth->h_proto == htons(ETH_P_8021Q))
  82. if (unlikely(parse_vlan(skb, key)))
  83. return -ENOMEM;
  84. /*提取ether type数据包类型如ETH_P_IP、ETH_P_ARP、ETH_P_IPV6等*/
  85. key->eth.type = parse_ethertype(skb);
  86. if (unlikely(key->eth.type == htons(0)))
  87. return -ENOMEM;
  88. /*重置L3头部指针及MAC长度,保证skb->network_header指向正确的位置*/
  89. skb_reset_network_header(skb);
  90. skb_reset_mac_len(skb);
  91. __skb_push(skb, skb->data - skb_mac_header(skb));
  92. /* Network layer. */
  93. /*IP协议数据包*/
  94. if (key->eth.type == htons(ETH_P_IP)) {
  95. struct iphdr *nh;
  96. __be16 offset;
  97. /*检查IP数据包的合法性,若合法则设置skb->transport_header*/
  98. error = check_iphdr(skb);
  99. if (unlikely(error)) {
  100. /*不合法的IP数据包*/
  101. memset(&key->ip, 0, sizeof(key->ip));
  102. memset(&key->ipv4, 0, sizeof(key->ipv4));
  103. if (error == -EINVAL) {
  104. /* 此处不知道为何将L4头设置为L3层头部,
  105. * 也不知道为何error=0,后面搞清楚了在回来修改*/
  106. skb->transport_header = skb->network_header;
  107. error = 0;
  108. }
  109. return error;
  110. }
  111. /*获取L3层头部指针,并提取源IP及目的IP信息到key中*/
  112. nh = ip_hdr(skb);
  113. key->ipv4.addr.src = nh->saddr;
  114. key->ipv4.addr.dst = nh->daddr;
  115. /*提取IP的四层协议信息、TOS及ttl信息到key中*/
  116. key->ip.proto = nh->protocol;
  117. key->ip.tos = nh->tos;
  118. key->ip.ttl = nh->ttl;
  119. /*
  120. * 从L3层中提取IP分片信息
  121. * 对IP分片中的几个标志不是很清楚,暂时不做说明
  122. */
  123. offset = nh->frag_off & htons(IP_OFFSET);
  124. if (offset) {
  125. key->ip.frag = OVS_FRAG_TYPE_LATER;
  126. return 0;
  127. }
  128. if (nh->frag_off & htons(IP_MF) ||
  129. skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
  130. key->ip.frag = OVS_FRAG_TYPE_FIRST;
  131. else
  132. key->ip.frag = OVS_FRAG_TYPE_NONE;
  133. /* Transport layer. */
  134. /*TCP协议数据包*/
  135. if (key->ip.proto == IPPROTO_TCP) {
  136. if (tcphdr_ok(skb)) {
  137. /*获取tcp四层头部,提取源端口及目的端口信息到key中*/
  138. struct tcphdr *tcp = tcp_hdr(skb);
  139. key->tp.src = tcp->source;
  140. key->tp.dst = tcp->dest;
  141. key->tp.flags = TCP_FLAGS_BE16(tcp);
  142. } else {
  143. memset(&key->tp, 0, sizeof(key->tp));
  144. }
  145. /*UDP协议数据包*/
  146. } else if (key->ip.proto == IPPROTO_UDP) {
  147. if (udphdr_ok(skb)) {
  148. /*获取UDP四层头部,提取源端口及目的端口信息到key中*/
  149. struct udphdr *udp = udp_hdr(skb);
  150. key->tp.src = udp->source;
  151. key->tp.dst = udp->dest;
  152. } else {
  153. memset(&key->tp, 0, sizeof(key->tp));
  154. }
  155. /*SCTP协议数据包*/
  156. } else if (key->ip.proto == IPPROTO_SCTP) {
  157. if (sctphdr_ok(skb)) {
  158. /*获取SCTP四层头部,提取源端口及目的端口到key中*/
  159. struct sctphdr *sctp = sctp_hdr(skb);
  160. key->tp.src = sctp->source;
  161. key->tp.dst = sctp->dest;
  162. } else {
  163. memset(&key->tp, 0, sizeof(key->tp));
  164. }
  165. /*ICMP协议数据包*/
  166. } else if (key->ip.proto == IPPROTO_ICMP) {
  167. if (icmphdr_ok(skb)) {
  168. /*获取ICMP头部,并提取ICMP 类型及代码字段到key中*/
  169. struct icmphdr *icmp = icmp_hdr(skb);
  170. /* The ICMP type and code fields use the 16-bit
  171. * transport port fields, so we need to store
  172. * them in 16-bit network byte order. */
  173. key->tp.src = htons(icmp->type);
  174. key->tp.dst = htons(icmp->code);
  175. } else {
  176. memset(&key->tp, 0, sizeof(key->tp));
  177. }
  178. }
  179. /*ARP协议或者RARP协议数据包*/
  180. } else if (key->eth.type == htons(ETH_P_ARP) ||
  181. key->eth.type == htons(ETH_P_RARP)) {
  182. struct arp_eth_header *arp;
  183. bool arp_available = arphdr_ok(skb);
  184. /*获取ARP头部指针*/
  185. arp = (struct arp_eth_header *)skb_network_header(skb);
  186. if (arp_available &&
  187. arp->ar_hrd == htons(ARPHRD_ETHER) &&
  188. arp->ar_pro == htons(ETH_P_IP) &&
  189. arp->ar_hln == ETH_ALEN &&
  190. arp->ar_pln == 4) {
  191. /*提取ARP option字段到key中*/
  192. /* We only match on the lower 8 bits of the opcode. */
  193. if (ntohs(arp->ar_op) <= 0xff)
  194. key->ip.proto = ntohs(arp->ar_op);
  195. else
  196. key->ip.proto = 0;
  197. /*提取源MAC、目的MAC、源IP及目的MAC信息到key中*/
  198. memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
  199. memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
  200. ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
  201. ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
  202. } else {
  203. memset(&key->ip, 0, sizeof(key->ip));
  204. memset(&key->ipv4, 0, sizeof(key->ipv4));
  205. }
  206. /*去提取MPLS信息到key中*/
  207. } else if (eth_p_mpls(key->eth.type)) {
  208. size_t stack_len = MPLS_HLEN;
  209. /* In the presence of an MPLS label stack the end of the L2
  210. * header and the beginning of the L3 header differ.
  211. *
  212. * Advance network_header to the beginning of the L3
  213. * header. mac_len corresponds to the end of the L2 header.
  214. */
  215. while (1) {
  216. __be32 lse;
  217. error = check_header(skb, skb->mac_len + stack_len);
  218. if (unlikely(error))
  219. return 0;
  220. memcpy(&lse, skb_network_header(skb), MPLS_HLEN);
  221. if (stack_len == MPLS_HLEN)
  222. memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
  223. skb_set_network_header(skb, skb->mac_len + stack_len);
  224. if (lse & htonl(MPLS_LS_S_MASK))
  225. break;
  226. stack_len += MPLS_HLEN;
  227. }
  228. /*IPv6协议,提取IPv6相关信息到key中 */
  229. } else if (key->eth.type == htons(ETH_P_IPV6)) {
  230. int nh_len; /* IPv6 Header + Extensions */
  231. nh_len = parse_ipv6hdr(skb, key);
  232. if (unlikely(nh_len < 0)) {
  233. memset(&key->ip, 0, sizeof(key->ip));
  234. memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr));
  235. if (nh_len == -EINVAL) {
  236. skb->transport_header = skb->network_header;
  237. error = 0;
  238. } else {
  239. error = nh_len;
  240. }
  241. return error;
  242. }
  243. if (key->ip.frag == OVS_FRAG_TYPE_LATER)
  244. return 0;
  245. if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
  246. key->ip.frag = OVS_FRAG_TYPE_FIRST;
  247. /* Transport layer. */
  248. if (key->ip.proto == NEXTHDR_TCP) {
  249. if (tcphdr_ok(skb)) {
  250. struct tcphdr *tcp = tcp_hdr(skb);
  251. key->tp.src = tcp->source;
  252. key->tp.dst = tcp->dest;
  253. key->tp.flags = TCP_FLAGS_BE16(tcp);
  254. } else {
  255. memset(&key->tp, 0, sizeof(key->tp));
  256. }
  257. } else if (key->ip.proto == NEXTHDR_UDP) {
  258. if (udphdr_ok(skb)) {
  259. struct udphdr *udp = udp_hdr(skb);
  260. key->tp.src = udp->source;
  261. key->tp.dst = udp->dest;
  262. } else {
  263. memset(&key->tp, 0, sizeof(key->tp));
  264. }
  265. } else if (key->ip.proto == NEXTHDR_SCTP) {
  266. if (sctphdr_ok(skb)) {
  267. struct sctphdr *sctp = sctp_hdr(skb);
  268. key->tp.src = sctp->source;
  269. key->tp.dst = sctp->dest;
  270. } else {
  271. memset(&key->tp, 0, sizeof(key->tp));
  272. }
  273. } else if (key->ip.proto == NEXTHDR_ICMP) {
  274. if (icmp6hdr_ok(skb)) {
  275. error = parse_icmpv6(skb, key, nh_len);
  276. if (error)
  277. return error;
  278. } else {
  279. memset(&key->tp, 0, sizeof(key->tp));
  280. }
  281. }
  282. }
  283. return 0;
  284. }

根据KEY值匹配流表(datapath/datapath.c)

通过ovs_flow_key_extract函数及key_extract函数从skb中提取所有需要的key值,下面就是使用key值来匹配OVS内核模块openvswitch中缓存的流表信息,并在匹配到流表后执行流表中相应的动作处理数据包。若在内核中未匹配到流表,则通过Netlink消息将key值发送到用户态ovs-vswitchd进程,由用户态进程来决定如何处理数据包

  1. /* Must be called with rcu_read_lock. */
  2. void
  3. ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
  4. {
  5. const struct vport *p = OVS_CB(skb)->input_vport;
  6. struct datapath *dp = p->dp;
  7. struct sw_flow *flow;
  8. struct sw_flow_actions *sf_acts;
  9. struct dp_stats_percpu *stats;
  10. u64 *stats_counter;
  11. u32 n_mask_hit;
  12. /*获取每CPU变量dp->stats_percpu*/
  13. stats = this_cpu_ptr(dp->stats_percpu);
  14. /* Look up flow. */
  15. /*根据key值遍历所有的流表*/
  16. flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
  17. if (unlikely(!flow)) {
  18. /*
  19. * 未匹配到任何流表,则将key值封装到Netlink消息中通过
  20. * netlink发送到用户态ovs-vswitchd进程
  21. * 由用户态进程来决定如何处理数据包
  22. */
  23. struct dp_upcall_info upcall;
  24. int error;
  25. upcall.cmd = OVS_PACKET_CMD_MISS;
  26. upcall.userdata = NULL;
  27. upcall.portid = ovs_vport_find_upcall_portid(p, skb);
  28. upcall.egress_tun_info = NULL;
  29. /*封装Netlink消息并发送给用户态ovs-vswitchd进程*/
  30. error = ovs_dp_upcall(dp, skb, key, &upcall);
  31. if (unlikely(error))
  32. kfree_skb(skb);
  33. else
  34. consume_skb(skb);
  35. stats_counter = &stats->n_missed;
  36. goto out;
  37. }
  38. /*查询到流表后,更新相关流表的信息,包括流表匹配的包数及字节数*/
  39. ovs_flow_stats_update(flow, key->tp.flags, skb);
  40. /*获取匹配的流表的执行动作*/
  41. sf_acts = rcu_dereference(flow->sf_acts);
  42. /*执行匹配流表的动作*/
  43. ovs_execute_actions(dp, skb, sf_acts, key);
  44. stats_counter = &stats->n_hit;
  45. out:
  46. /* Update datapath statistics. */
  47. u64_stats_update_begin(&stats->syncp);
  48. (*stats_counter)++;
  49. stats->n_mask_hit += n_mask_hit;
  50. u64_stats_update_end(&stats->syncp);
  51. }

执行流表ACTION(datapath/actions.c)

匹配到对应的流表后,从流表中获取流表的动作,循环遍历所有的flow action,执行相应的action动作。

  1. /* Execute a list of actions against 'skb'. */
  2. int
  3. ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
  4. const struct sw_flow_actions *acts,
  5. struct sw_flow_key *key)
  6. {
  7. int level = this_cpu_read(exec_actions_level);
  8. int err;
  9. this_cpu_inc(exec_actions_level);
  10. OVS_CB(skb)->egress_tun_info = NULL;
  11. /*执行流表动作*/
  12. err = do_execute_actions(dp, skb, key,
  13. acts->actions, acts->actions_len);
  14. /*不知道这个process_deferred_actions具体是干什么的*/
  15. if (!level)
  16. process_deferred_actions(dp);
  17. this_cpu_dec(exec_actions_level);
  18. return err;
  19. }
  20. /* Execute a list of actions against 'skb'. */
  21. static int
  22. do_execute_actions(struct datapath *dp, struct sk_buff *skb,
  23. struct sw_flow_key *key,
  24. const struct nlattr *attr, int len)
  25. {
  26. /* Every output action needs a separate clone of 'skb', but the common
  27. * case is just a single output action, so that doing a clone and
  28. * then freeing the original skbuff is wasteful. So the following code
  29. * is slightly obscure just to avoid that.
  30. */
  31. int prev_port = -1;
  32. const struct nlattr *a;
  33. int rem;
  34. for (a = attr, rem = len; rem > 0;
  35. a = nla_next(a, &rem)) {
  36. int err = 0;
  37. if (unlikely(prev_port != -1)) {
  38. /*设置了output接口,克隆一份skb将数据包从
  39. * prv_port接口发送出去*/
  40. struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);
  41. if (out_skb)
  42. do_output(dp, out_skb, prev_port);
  43. prev_port = -1;
  44. }
  45. switch (nla_type(a)) {
  46. /*数据包发送的端口号*/
  47. case OVS_ACTION_ATTR_OUTPUT:
  48. prev_port = nla_get_u32(a);
  49. break;
  50. /*将数据包发送到用户态进程*/
  51. case OVS_ACTION_ATTR_USERSPACE:
  52. output_userspace(dp, skb, key, a);
  53. break;
  54. /*为key->ovs_flow_hash赋值*/
  55. case OVS_ACTION_ATTR_HASH:
  56. execute_hash(skb, key, a);
  57. break;
  58. /*MPLS处理,不了解MPLS,忽略*/
  59. case OVS_ACTION_ATTR_PUSH_MPLS:
  60. err = push_mpls(skb, key, nla_data(a));
  61. break;
  62. /*MPLS处理,忽略*/
  63. case OVS_ACTION_ATTR_POP_MPLS:
  64. err = pop_mpls(skb, key, nla_get_be16(a));
  65. break;
  66. /*设置VLAN tag*/
  67. case OVS_ACTION_ATTR_PUSH_VLAN:
  68. err = push_vlan(skb, key, nla_data(a));
  69. break;
  70. /*去Vlan tag*/
  71. case OVS_ACTION_ATTR_POP_VLAN:
  72. err = pop_vlan(skb, key);
  73. break;
  74. /*将skb及key添加到defered action中*/
  75. case OVS_ACTION_ATTR_RECIRC:
  76. err = execute_recirc(dp, skb, key, a, rem);
  77. if (nla_is_last(a, rem)) {
  78. /* If this is the last action, the skb has
  79. * been consumed or freed.
  80. * Return immediately.
  81. */
  82. return err;
  83. }
  84. break;
  85. /*根据修改的动作,对数据包进行修改*/
  86. case OVS_ACTION_ATTR_SET:
  87. err = execute_set_action(skb, key, nla_data(a));
  88. break;
  89. case OVS_ACTION_ATTR_SAMPLE:
  90. err = sample(dp, skb, key, a);
  91. break;
  92. }
  93. if (unlikely(err)) {
  94. kfree_skb(skb);
  95. return err;
  96. }
  97. }
  98. if (prev_port != -1)
  99. do_output(dp, skb, prev_port);
  100. else
  101. consume_skb(skb);
  102. return 0;
  103. }

OUTPUT ACTION(datapath/actions.c)

流表的OUTPUT动作指定了数据包发送的出接口信息,调用do_output->ovs_vport_send->vport->ops->send发送函数将数据包从output action对应的接口发送出去。

  1. /*do_outpu发送数据包*/
  2. static void
  3. do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
  4. {
  5. struct vport *vport = ovs_vport_rcu(dp, out_port);
  6. if (likely(vport))
  7. ovs_vport_send(vport, skb);
  8. else
  9. kfree_skb(skb);
  10. }
  11. /**
  12. * ovs_vport_send - send a packet on a device
  13. *
  14. * @vport: vport on which to send the packet
  15. * @skb: skb to send
  16. *
  17. * Sends the given packet and returns the length of data sent. Either ovs
  18. * lock or rcu_read_lock must be held.
  19. */
  20. int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
  21. {
  22. /* 调用vport->ops->send回调函数发送数据包 */
  23. int sent = vport->ops->send(vport, skb);
  24. if (likely(sent > 0)) {
  25. struct pcpu_sw_netstats *stats;
  26. /*发送成功后更新每CPU变量vport->percpu_stats中的发送包数及发送字节数*/
  27. stats = this_cpu_ptr(vport->percpu_stats);
  28. u64_stats_update_begin(&stats->syncp);
  29. stats->tx_packets++;
  30. stats->tx_bytes += sent;
  31. u64_stats_update_end(&stats->syncp);
  32. } else if (sent < 0) {
  33. ovs_vport_record_error(vport, VPORT_E_TX_ERROR);
  34. } else {
  35. ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
  36. }
  37. return sent;
  38. }

当OVS接口类型为system时,vport->ops->send函数为netdev_send:

  1. /*此函数即为OVS流表output action 发送数据包时的函数*/
  2. static int
  3. netdev_send(struct vport *vport, struct sk_buff *skb)
  4. {
  5. struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
  6. int mtu = netdev_vport->dev->mtu;
  7. int len;
  8. /*如果未开启gso且数据包长度大于MTU,则释放数据包*/
  9. if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
  10. net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
  11. netdev_vport->dev->name,
  12. packet_length(skb), mtu);
  13. goto drop;
  14. }
  15. /*设置skb->dev为output action网口*/
  16. skb->dev = netdev_vport->dev;
  17. len = skb->len;
  18. /*最后调用dev_queue_xmit发送数据包*/
  19. dev_queue_xmit(skb);
  20. return len;
  21. drop:
  22. kfree_skb(skb);
  23. return 0;
  24. }

SET ACTION(datapath/actions.c)

流表SET动作会修改数据包中指定的信息,如skb->priority skb->mark等信息。

  1. static int
  2. execute_set_action(struct sk_buff *skb, struct sw_flow_key *key,
  3. const struct nlattr *nested_attr)
  4. {
  5. int err = 0;
  6. switch (nla_type(nested_attr)) {
  7. case OVS_KEY_ATTR_PRIORITY:
  8. skb->priority = nla_get_u32(nested_attr);
  9. key->phy.priority = skb->priority;
  10. break;
  11. case OVS_KEY_ATTR_SKB_MARK:
  12. skb->mark = nla_get_u32(nested_attr);
  13. key->phy.skb_mark = skb->mark;
  14. break;
  15. case OVS_KEY_ATTR_TUNNEL_INFO:
  16. OVS_CB(skb)->egress_tun_info = nla_data(nested_attr);
  17. break;
  18. case OVS_KEY_ATTR_ETHERNET:
  19. err = set_eth_addr(skb, key, nla_data(nested_attr));
  20. break;
  21. case OVS_KEY_ATTR_IPV4:
  22. err = set_ipv4(skb, key, nla_data(nested_attr));
  23. break;
  24. case OVS_KEY_ATTR_IPV6:
  25. err = set_ipv6(skb, key, nla_data(nested_attr));
  26. break;
  27. case OVS_KEY_ATTR_TCP:
  28. err = set_tcp(skb, key, nla_data(nested_attr));
  29. break;
  30. case OVS_KEY_ATTR_UDP:
  31. err = set_udp(skb, key, nla_data(nested_attr));
  32. break;
  33. case OVS_KEY_ATTR_SCTP:
  34. err = set_sctp(skb, key, nla_data(nested_attr));
  35. break;
  36. case OVS_KEY_ATTR_MPLS:
  37. err = set_mpls(skb, key, nla_data(nested_attr));
  38. break;
  39. }
  40. return err;
  41. }

PUSH_VLAN ACTION(datapath/actions.c)

流表PUSH_VLAN动作会在数据包中添加对应的VLAN tag信息。

  1. static int
  2. push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
  3. const struct ovs_action_push_vlan *vlan)
  4. {
  5. if (vlan_tx_tag_present(skb))
  6. invalidate_flow_key(key);
  7. else
  8. key->eth.tci = vlan->vlan_tci;
  9. return skb_vlan_push(skb, vlan->vlan_tpid,
  10. ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
  11. }
  12. int
  13. skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
  14. {
  15. if (vlan_tx_tag_present(skb)) {
  16. unsigned int offset = skb->data - skb_mac_header(skb);
  17. int err;
  18. /* __vlan_insert_tag expect skb->data pointing to mac header.
  19. * So change skb->data before calling it and change back to
  20. * original position later
  21. */
  22. __skb_push(skb, offset);
  23. err = __vlan_insert_tag(skb, skb->vlan_proto,
  24. vlan_tx_tag_get(skb));
  25. if (err)
  26. return err;
  27. skb->protocol = skb->vlan_proto;
  28. skb->mac_len += VLAN_HLEN;
  29. __skb_pull(skb, offset);
  30. if (skb->ip_summed == CHECKSUM_COMPLETE)
  31. skb->csum = csum_add(skb->csum, csum_partial(skb->data
  32. + (2 * ETH_ALEN), VLAN_HLEN, 0));
  33. }
  34. __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
  35. return 0;
  36. }
  37. static inline void
  38. __vlan_hwaccel_put_tag(struct sk_buff *skb,
  39. __be16 vlan_proto, u16 vlan_tci)
  40. {
  41. /*设置数据包Vlan tag信息*/
  42. skb->vlan_proto = vlan_proto;
  43. skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci;
  44. }

POP_VLAN ACTION(datapath/actions.c)

流表POP_VLAN动作移除数据包中的Vlan tag信息并更新数据包中的校验和

  1. static int
  2. pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
  3. {
  4. int err;
  5. err = skb_vlan_pop(skb);
  6. if (vlan_tx_tag_present(skb))
  7. invalidate_flow_key(key);
  8. else
  9. key->eth.tci = 0;
  10. return err;
  11. }
  12. int
  13. skb_vlan_pop(struct sk_buff *skb)
  14. {
  15. u16 vlan_tci;
  16. __be16 vlan_proto;
  17. int err;
  18. if (likely(vlan_tx_tag_present(skb))) {
  19. skb->vlan_tci = 0;
  20. } else {
  21. if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
  22. skb->protocol != htons(ETH_P_8021AD)) ||
  23. skb->len < VLAN_ETH_HLEN))
  24. return 0;
  25. err = __skb_vlan_pop(skb, &vlan_tci);
  26. if (err)
  27. return err;
  28. }
  29. /* move next vlan tag to hw accel tag */
  30. if (likely((skb->protocol != htons(ETH_P_8021Q) &&
  31. skb->protocol != htons(ETH_P_8021AD)) ||
  32. skb->len < VLAN_ETH_HLEN))
  33. return 0;
  34. vlan_proto = skb->protocol;
  35. err = __skb_vlan_pop(skb, &vlan_tci);
  36. if (unlikely(err))
  37. return err;
  38. __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
  39. return 0;
  40. }
  41. /* remove VLAN header from packet and update csum accordingly. */
  42. static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
  43. {
  44. struct vlan_hdr *vhdr;
  45. unsigned int offset = skb->data - skb_mac_header(skb);
  46. int err;
  47. __skb_push(skb, offset);
  48. err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
  49. if (unlikely(err))
  50. goto pull;
  51.  
  52. skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
  53. vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
  54. *vlan_tci = ntohs(vhdr->h_vlan_TCI);
  55. memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
  56. __skb_pull(skb, VLAN_HLEN);
  57. vlan_set_encap_proto(skb, vhdr);
  58. skb->mac_header += VLAN_HLEN;
  59. if (skb_network_offset(skb) < ETH_HLEN)
  60. skb_set_network_header(skb, ETH_HLEN);
  61. skb_reset_mac_len(skb);
  62. pull:
  63. __skb_pull(skb, offset);
  64. return err;
  65. }

OVS 内核KEY值提取及匹配流表代码分析的更多相关文章

  1. 《linux 内核全然剖析》sched.c sched.h 代码分析笔记

    版权声明:本文为博主原创文章.未经博主同意不得转载. https://blog.csdn.net/u011368821/article/details/25129835 sched.c sched.h ...

  2. ovs源码阅读--流表查询原理

    背景 在ovs交换机中,报文的处理流程可以划分为一下三个步骤:协议解析,表项查找和动作执行,其中最耗时的步骤在于表项查找,往往一个流表中有数目巨大的表项,如何根据数据报文的信息快速的查找到对应的流表项 ...

  3. OpenFlow协议中如何提高交换机流表的匹配成功率

    写在前面 这段时间一直在研究如何提高流表空间的利用率.一直没能想到好的idea.有一篇文献中比较了现有研究中提到的手段,在这里记录一下都有哪些类型的手段以及这些手段存在的不足.这些手段不仅局限于如何提 ...

  4. Thinkphp volist 多重循环原样输出数组key值的使用总结

    最近因为项目的缘故,要使用到volist.在这个过程中,遇到了一些小问题,主要就是volist在循环输出多重数据的时候,如何输出key.网上查阅了不少资料,很失望的是,大多资料就是粘贴复制Thinkp ...

  5. openvswitch 流表操作

    流表组成 每条流表规则由一些列字段组成,可以分为**基础字段.匹配字段和动作字段**三部分. 在打印流表时,在流表中还存在一些显示字段,如duration,idle_age等,此处把这些字段也暂时归之 ...

  6. 实验 6:OpenDaylight 实验——OpenDaylight 及 Postman 实现流表下发

    一.实验目的 熟悉 Postman 的使用;熟悉如何使用 OpenDaylight 通过 Postman 下发流表. 二.实验任务 流表有软超时和硬超时的概念,分别对应流表中的 idle_timeou ...

  7. 实验 6:OpenDaylight 实验——OpenDaylight 及 Postman 实现流表下发

    一.实验目的 熟悉 Postman 的使用:熟悉如何使用 OpenDaylight 通过 Postman 下发流表. 二.实验任务 流表有软超时和硬超时的概念,分别对应流表中的 idle_timeou ...

  8. 实验 6 :OpenDaylight 实验——OpenDaylight 及 Postman实现流表下发

    实验 6 :OpenDaylight 实验--OpenDaylight 及 Postman实现流表下发 一.实验目的 熟悉 Postman 的使用:熟悉如何使用 OpenDaylight 通过 Pos ...

  9. js提取对象的key值和value值

    在代码中,遇到需要单独提取对象的key值时 可使用 Object.keys(object)  object是你需要操作的对象 Object.keys()会返回一个存储对象中所有key值的数组 获取当前 ...

随机推荐

  1. LR参数化类型为file显示大于100数据方法

    在做测试的时候,某些数据需要大量参数化,可以用连接数据库方式,也可以使用file类型进行参数化,而loadrunner中file类型的参数化数据只显示100条,可以调整如下配置文件进行显示数据的修改: ...

  2. 论坛遇到附件上传失败问题总结(discuz)

    (1)bbs/source/class/class_upload.php 50行左右,注释$attach['target'] $attach['target'] = DISCUZ_ROOT.'./da ...

  3. dumpbin 查看dll中的导出函数

    C:\Program Files (x86)\Microsoft Visual Studio 14.0>dumpbin -exports E:\20171110\Release\aa.dll h ...

  4. android textview settext卡顿深层次原因

    最近在公司项目里面发现listview里面的textview在调用settext函数的时候非常耗时,当时都有点不敢相信,这是因为如果你把textview设置成wrap_content,则每次调用set ...

  5. DB2自增长ID

    建议类似的应用采用sequence对象,将来的应用维护和数据迁移会很方便.考虑的因素较少. 对于序列可以使用nextval和prevval来获得下一个和上一个值:CREATE SEQUENCE seq ...

  6. 测试这个才可以打包 我的PYQt matplotlib numpy 等程序

    from distutils.core import setup import py2exe import matplotlib import sys import FileDialog import ...

  7. 【Web】网站主如何更改网页标签的图标(favicon.ico)

    修改web项目的favicon图标,方式有两种:全局方式和局部方式 全局方式: 进入服务器\webapps\ROOT,然后用自己的favicon.ico替换服务器自带的favicon.ico图片 局部 ...

  8. MUI框架开发HTML5手机APP(一)--搭建第一个手机APP(转)

    出处:http://www.cnblogs.com/jerehedu/p/7832808.html  前  言 JRedu 随着HTML5的不断发展,移动开发成为主流趋势!越来越多的公司开始选择使用H ...

  9. Linux下编译与调试

    gcc/g++编译器 对于.c格式的C文件,可以采用gcc或g++编译 对于 .cc..cpp格式的C++文件,应该采用g++进行编译 常用的选项: -c  表示编译源文件 -o  表示输出目标文件 ...

  10. 解决Docker时区与主机时区不一致的问题

    在Dockerfile里面增加以下红色的部分 FROM hub.chinacloud.com/common/jdk:8MAINTAINER xxx@chinacloud.com.cn RUN mkdi ...