From: Ben Pfaff Date: Tue, 10 Aug 2010 18:35:46 +0000 (-0700) Subject: datapath: Fix handling of 802.1Q and SNAP headers. X-Git-Tag: v1.1.0pre1~108 X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=50f06e1642e246bb9969035c9e3db7c44e7b95e0;p=sliver-openvswitch.git datapath: Fix handling of 802.1Q and SNAP headers. The kernel and user datapaths have code that assumes that 802.1Q headers are used only inside Ethernet II frames, not inside SNAP-encapsulated frames. But the kernel and user flow_extract() implementations would interpret 802.1Q headers inside SNAP headers as being valid VLANs. This would cause packet corruption if any VLAN-related actions were to be taken, so change the two flow_extract() implementations only to accept 802.1Q as an Ethernet II frame type, not as a SNAP-encoded frame type. 802.1Q-2005 says that this is correct anyhow: Where the ISS instance used to transmit and receive tagged frames is provided by a media access control method that can support Ethernet Type encoding directly (e.g., is an IEEE 802.3 or IEEE 802.11 MAC) or is media access method independent (e.g., 6.6), the TPID is Ethernet Type encoded, i.e., is two octets in length and comprises solely the assigned Ethernet Type value. Where the ISS instance is provided by a media access method that cannot directly support Ethernet Type encoding (e.g., is an IEEE 802.5 or FDDI MAC), the TPID is encoded according to the rule for a Subnetwork Access Protocol (Clause 10 of IEEE Std 802) that encapsulates Ethernet frames over LLC, and comprises the SNAP header (AA-AA-03) followed by the SNAP PID (00-00-00) followed by the two octets of the assigned Ethernet Type value. All of the media that OVS handles supports Ethernet Type fields, so to me that means that we don't have to handle 802.1Q-inside-SNAP. On the other hand, we *do* have to handle SNAP-inside-802.1Q, because this is actually allowed by the standards. So this commit also adds that support. I verified that, with this change, both SNAP and Ethernet packets are properly recognized both with and without 802.1Q encapsulation. I was a bit surprised to find out that Linux does not accept SNAP-encapsulated IP frames on Ethernet. Here's a summary of how frames are handled before and after this commit: Common cases ------------ Ethernet +------------+ 1. |dst|src|TYPE| +------------+ Ethernet LLC SNAP +------------+ +--------+ +-----------+ 2. |dst|src| len| |aa|aa|03| |000000|TYPE| +------------+ +--------+ +-----------+ Ethernet 802.1Q +------------+ +---------+ 3. |dst|src|8100| |VLAN|TYPE| +------------+ +---------+ Ethernet 802.1Q LLC SNAP +------------+ +---------+ +--------+ +-----------+ 4. |dst|src|8100| |VLAN| LEN| |aa|aa|03| |000000|TYPE| +------------+ +---------+ +--------+ +-----------+ Unusual cases ------------- Ethernet LLC SNAP 802.1Q +------------+ +--------+ +-----------+ +---------+ 5. |dst|src| len| |aa|aa|03| |000000|8100| |VLAN|TYPE| +------------+ +--------+ +-----------+ +---------+ Ethernet LLC +------------+ +--------+ 6. |dst|src| len| |xx|xx|xx| +------------+ +--------+ Ethernet LLC SNAP +------------+ +--------+ +-----------+ 7. |dst|src| len| |aa|aa|03| |xxxxxx|xxxx| +------------+ +--------+ +-----------+ Ethernet 802.1Q LLC +------------+ +---------+ +--------+ 8. |dst|src|8100| |VLAN| LEN| |xx|xx|xx| +------------+ +---------+ +--------+ Ethernet 802.1Q LLC SNAP +------------+ +---------+ +--------+ +-----------+ 9. |dst|src|8100| |VLAN| LEN| |aa|aa|03| |xxxxxx|xxxx| +------------+ +---------+ +--------+ +-----------+ Behavior -------- --------------- --------------- ------------------------------------- Before After this commit this commit dl_type dl_vlan dl_type dl_vlan Notes ------- ------- ------- ------- ------------------------------------- 1. TYPE ffff TYPE ffff no change 2. TYPE ffff TYPE ffff no change 3. TYPE VLAN TYPE VLAN no change 4. LEN VLAN TYPE VLAN proposal fixes behavior 5. TYPE VLAN 8100 ffff 802.1Q says this is invalid framing 6. 05ff ffff 05ff ffff no change 7. 05ff ffff 05ff ffff no change 8. LEN VLAN 05ff VLAN proposal fixes behavior 9. LEN VLAN 05ff VLAN proposal fixes behavior Signed-off-by: Ben Pfaff --- diff --git a/datapath/flow.c b/datapath/flow.c index 8b736c883..f769b14d6 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -171,23 +171,52 @@ void flow_deferred_free_acts(struct sw_flow_actions *sf_acts) call_rcu(&sf_acts->rcu, rcu_free_acts_callback); } -#define SNAP_OUI_LEN 3 - -struct eth_snap_hdr +static void parse_vlan(struct sk_buff *skb, struct odp_flow_key *key) { - struct ethhdr eth; - u8 dsap; /* Always 0xAA */ - u8 ssap; /* Always 0xAA */ - u8 ctrl; - u8 oui[SNAP_OUI_LEN]; - u16 ethertype; -} __attribute__ ((packed)); - -static int is_snap(const struct eth_snap_hdr *esh) + struct qtag_prefix { + __be16 eth_type; /* ETH_P_8021Q */ + __be16 tci; + }; + struct qtag_prefix *qp; + + if (skb->len < sizeof(struct qtag_prefix) + sizeof(__be16)) + return; + + qp = (struct qtag_prefix *) skb->data; + key->dl_vlan = qp->tci & htons(VLAN_VID_MASK); + key->dl_vlan_pcp = (ntohs(qp->tci) & VLAN_PCP_MASK) >> VLAN_PCP_SHIFT; + __skb_pull(skb, sizeof(struct qtag_prefix)); +} + +static __be16 parse_ethertype(struct sk_buff *skb) { - return (esh->dsap == LLC_SAP_SNAP - && esh->ssap == LLC_SAP_SNAP - && !memcmp(esh->oui, "\0\0\0", 3)); + struct llc_snap_hdr { + u8 dsap; /* Always 0xAA */ + u8 ssap; /* Always 0xAA */ + u8 ctrl; + u8 oui[3]; + u16 ethertype; + }; + struct llc_snap_hdr *llc; + __be16 proto; + + proto = *(__be16 *) skb->data; + __skb_pull(skb, sizeof(__be16)); + + if (ntohs(proto) >= ODP_DL_TYPE_ETH2_CUTOFF) + return proto; + + if (unlikely(skb->len < sizeof(struct llc_snap_hdr))) + return htons(ODP_DL_TYPE_NOT_ETH_TYPE); + + llc = (struct llc_snap_hdr *) skb->data; + if (llc->dsap != LLC_SAP_SNAP || + llc->ssap != LLC_SAP_SNAP || + (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0) + return htons(ODP_DL_TYPE_NOT_ETH_TYPE); + + __skb_pull(skb, sizeof(struct llc_snap_hdr)); + return llc->ethertype; } /* Parses the Ethernet frame in 'skb', which was received on 'in_port', @@ -196,9 +225,7 @@ static int is_snap(const struct eth_snap_hdr *esh) int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key) { struct ethhdr *eth; - struct eth_snap_hdr *esh; int retval = 0; - int nh_ofs; memset(key, 0, sizeof *key); key->tun_id = OVS_CB(skb)->tun_id; @@ -207,43 +234,28 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key) if (skb->len < sizeof *eth) return 0; - if (!pskb_may_pull(skb, skb->len >= 64 ? 64 : skb->len)) { + if (!pskb_may_pull(skb, skb->len >= 64 ? 64 : skb->len)) return 0; - } skb_reset_mac_header(skb); - eth = eth_hdr(skb); - esh = (struct eth_snap_hdr *) eth; - nh_ofs = sizeof *eth; - if (likely(ntohs(eth->h_proto) >= ODP_DL_TYPE_ETH2_CUTOFF)) - key->dl_type = eth->h_proto; - else if (skb->len >= sizeof *esh && is_snap(esh)) { - key->dl_type = esh->ethertype; - nh_ofs = sizeof *esh; - } else { - key->dl_type = htons(ODP_DL_TYPE_NOT_ETH_TYPE); - if (skb->len >= nh_ofs + sizeof(struct llc_pdu_un)) { - nh_ofs += sizeof(struct llc_pdu_un); - } - } - /* Check for a VLAN tag */ - if (key->dl_type == htons(ETH_P_8021Q) && - skb->len >= nh_ofs + sizeof(struct vlan_hdr)) { - struct vlan_hdr *vh = (struct vlan_hdr*)(skb->data + nh_ofs); - key->dl_type = vh->h_vlan_encapsulated_proto; - key->dl_vlan = vh->h_vlan_TCI & htons(VLAN_VID_MASK); - key->dl_vlan_pcp = (ntohs(vh->h_vlan_TCI) & VLAN_PCP_MASK) >> VLAN_PCP_SHIFT; - nh_ofs += sizeof(struct vlan_hdr); - } + /* Link layer. */ + eth = eth_hdr(skb); memcpy(key->dl_src, eth->h_source, ETH_ALEN); memcpy(key->dl_dst, eth->h_dest, ETH_ALEN); - skb_set_network_header(skb, nh_ofs); + + /* dl_type, dl_vlan, dl_vlan_pcp. */ + __skb_pull(skb, 2 * ETH_ALEN); + if (eth->h_proto == htons(ETH_P_8021Q)) + parse_vlan(skb, key); + key->dl_type = parse_ethertype(skb); + skb_reset_network_header(skb); + __skb_push(skb, skb->data - (unsigned char *)eth); /* Network layer. */ if (key->dl_type == htons(ETH_P_IP) && iphdr_ok(skb)) { struct iphdr *nh = ip_hdr(skb); - int th_ofs = nh_ofs + nh->ihl * 4; + int th_ofs = skb_network_offset(skb) + nh->ihl * 4; key->nw_src = nh->saddr; key->nw_dst = nh->daddr; key->nw_tos = nh->tos & ~INET_ECN_MASK; diff --git a/lib/flow.c b/lib/flow.c index 72ee14f2a..d545661ab 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -77,16 +77,47 @@ pull_icmp(struct ofpbuf *packet) return ofpbuf_try_pull(packet, ICMP_HEADER_LEN); } -static struct eth_header * -pull_eth(struct ofpbuf *packet) +static void +parse_vlan(struct ofpbuf *b, flow_t *flow) { - return ofpbuf_try_pull(packet, ETH_HEADER_LEN); + struct qtag_prefix { + uint16_t eth_type; /* ETH_TYPE_VLAN */ + uint16_t tci; + }; + + if (b->size >= sizeof(struct qtag_prefix) + sizeof(uint16_t)) { + struct qtag_prefix *qp = ofpbuf_pull(b, sizeof *qp); + flow->dl_vlan = qp->tci & htons(VLAN_VID_MASK); + flow->dl_vlan_pcp = (ntohs(qp->tci) & VLAN_PCP_MASK) >> VLAN_PCP_SHIFT; + } } -static struct vlan_header * -pull_vlan(struct ofpbuf *packet) +static uint16_t +parse_ethertype(struct ofpbuf *b) { - return ofpbuf_try_pull(packet, VLAN_HEADER_LEN); + struct llc_snap_header *llc; + uint16_t proto; + + proto = *(uint16_t *) ofpbuf_pull(b, sizeof proto); + if (ntohs(proto) >= ODP_DL_TYPE_ETH2_CUTOFF) { + return proto; + } + + if (b->size < sizeof *llc) { + return htons(ODP_DL_TYPE_NOT_ETH_TYPE); + } + + llc = b->data; + if (llc->llc.llc_dsap != LLC_DSAP_SNAP + || llc->llc.llc_ssap != LLC_SSAP_SNAP + || llc->llc.llc_cntl != LLC_CNTL_SNAP + || memcmp(llc->snap.snap_org, SNAP_ORG_ETHERNET, + sizeof llc->snap.snap_org)) { + return htons(ODP_DL_TYPE_NOT_ETH_TYPE); + } + + ofpbuf_pull(b, sizeof *llc); + return llc->snap.snap_type; } /* Returns 1 if 'packet' is an IP fragment, 0 otherwise. @@ -112,109 +143,86 @@ flow_extract(struct ofpbuf *packet, uint32_t tun_id, uint16_t in_port, packet->l4 = NULL; packet->l7 = NULL; - eth = pull_eth(&b); - if (eth) { - if (ntohs(eth->eth_type) >= OFP_DL_TYPE_ETH2_CUTOFF) { - /* This is an Ethernet II frame */ - flow->dl_type = eth->eth_type; - } else { - /* This is an 802.2 frame */ - struct llc_header *llc = ofpbuf_at(&b, 0, sizeof *llc); - struct snap_header *snap = ofpbuf_at(&b, sizeof *llc, - sizeof *snap); - if (llc == NULL) { - return 0; - } - if (snap - && llc->llc_dsap == LLC_DSAP_SNAP - && llc->llc_ssap == LLC_SSAP_SNAP - && llc->llc_cntl == LLC_CNTL_SNAP - && !memcmp(snap->snap_org, SNAP_ORG_ETHERNET, - sizeof snap->snap_org)) { - flow->dl_type = snap->snap_type; - ofpbuf_pull(&b, LLC_SNAP_HEADER_LEN); - } else { - flow->dl_type = htons(OFP_DL_TYPE_NOT_ETH_TYPE); - ofpbuf_pull(&b, sizeof(struct llc_header)); - } - } + if (b.size < sizeof *eth) { + return 0; + } - /* Check for a VLAN tag */ - if (flow->dl_type == htons(ETH_TYPE_VLAN)) { - struct vlan_header *vh = pull_vlan(&b); - if (vh) { - flow->dl_type = vh->vlan_next_type; - flow->dl_vlan = vh->vlan_tci & htons(VLAN_VID_MASK); - flow->dl_vlan_pcp = (ntohs(vh->vlan_tci) & 0xe000) >> 13; - } - } - memcpy(flow->dl_src, eth->eth_src, ETH_ADDR_LEN); - memcpy(flow->dl_dst, eth->eth_dst, ETH_ADDR_LEN); - - packet->l3 = b.data; - if (flow->dl_type == htons(ETH_TYPE_IP)) { - const struct ip_header *nh = pull_ip(&b); - if (nh) { - flow->nw_src = get_unaligned_u32(&nh->ip_src); - flow->nw_dst = get_unaligned_u32(&nh->ip_dst); - flow->nw_tos = nh->ip_tos & IP_DSCP_MASK; - flow->nw_proto = nh->ip_proto; - packet->l4 = b.data; - if (!IP_IS_FRAGMENT(nh->ip_frag_off)) { - if (flow->nw_proto == IP_TYPE_TCP) { - const struct tcp_header *tcp = pull_tcp(&b); - if (tcp) { - flow->tp_src = tcp->tcp_src; - flow->tp_dst = tcp->tcp_dst; - packet->l7 = b.data; - } else { - /* Avoid tricking other code into thinking that - * this packet has an L4 header. */ - flow->nw_proto = 0; - } - } else if (flow->nw_proto == IP_TYPE_UDP) { - const struct udp_header *udp = pull_udp(&b); - if (udp) { - flow->tp_src = udp->udp_src; - flow->tp_dst = udp->udp_dst; - packet->l7 = b.data; - } else { - /* Avoid tricking other code into thinking that - * this packet has an L4 header. */ - flow->nw_proto = 0; - } - } else if (flow->nw_proto == IP_TYPE_ICMP) { - const struct icmp_header *icmp = pull_icmp(&b); - if (icmp) { - flow->icmp_type = htons(icmp->icmp_type); - flow->icmp_code = htons(icmp->icmp_code); - packet->l7 = b.data; - } else { - /* Avoid tricking other code into thinking that - * this packet has an L4 header. */ - flow->nw_proto = 0; - } + /* Link layer. */ + eth = b.data; + memcpy(flow->dl_src, eth->eth_src, ETH_ADDR_LEN); + memcpy(flow->dl_dst, eth->eth_dst, ETH_ADDR_LEN); + + /* dl_type, dl_vlan, dl_vlan_pcp. */ + ofpbuf_pull(&b, ETH_ADDR_LEN * 2); + if (eth->eth_type == htons(ETH_TYPE_VLAN)) { + parse_vlan(&b, flow); + } + flow->dl_type = parse_ethertype(&b); + + /* Network layer. */ + packet->l3 = b.data; + if (flow->dl_type == htons(ETH_TYPE_IP)) { + const struct ip_header *nh = pull_ip(&b); + if (nh) { + flow->nw_src = get_unaligned_u32(&nh->ip_src); + flow->nw_dst = get_unaligned_u32(&nh->ip_dst); + flow->nw_tos = nh->ip_tos & IP_DSCP_MASK; + flow->nw_proto = nh->ip_proto; + packet->l4 = b.data; + if (!IP_IS_FRAGMENT(nh->ip_frag_off)) { + if (flow->nw_proto == IP_TYPE_TCP) { + const struct tcp_header *tcp = pull_tcp(&b); + if (tcp) { + flow->tp_src = tcp->tcp_src; + flow->tp_dst = tcp->tcp_dst; + packet->l7 = b.data; + } else { + /* Avoid tricking other code into thinking that + * this packet has an L4 header. */ + flow->nw_proto = 0; + } + } else if (flow->nw_proto == IP_TYPE_UDP) { + const struct udp_header *udp = pull_udp(&b); + if (udp) { + flow->tp_src = udp->udp_src; + flow->tp_dst = udp->udp_dst; + packet->l7 = b.data; + } else { + /* Avoid tricking other code into thinking that + * this packet has an L4 header. */ + flow->nw_proto = 0; + } + } else if (flow->nw_proto == IP_TYPE_ICMP) { + const struct icmp_header *icmp = pull_icmp(&b); + if (icmp) { + flow->icmp_type = htons(icmp->icmp_type); + flow->icmp_code = htons(icmp->icmp_code); + packet->l7 = b.data; + } else { + /* Avoid tricking other code into thinking that + * this packet has an L4 header. */ + flow->nw_proto = 0; } - } else { - retval = 1; } + } else { + retval = 1; + } + } + } else if (flow->dl_type == htons(ETH_TYPE_ARP)) { + const struct arp_eth_header *arp = pull_arp(&b); + if (arp && arp->ar_hrd == htons(1) + && arp->ar_pro == htons(ETH_TYPE_IP) + && arp->ar_hln == ETH_ADDR_LEN + && arp->ar_pln == 4) { + /* We only match on the lower 8 bits of the opcode. */ + if (ntohs(arp->ar_op) <= 0xff) { + flow->nw_proto = ntohs(arp->ar_op); } - } else if (flow->dl_type == htons(ETH_TYPE_ARP)) { - const struct arp_eth_header *arp = pull_arp(&b); - if (arp && arp->ar_hrd == htons(1) - && arp->ar_pro == htons(ETH_TYPE_IP) - && arp->ar_hln == ETH_ADDR_LEN - && arp->ar_pln == 4) { - /* We only match on the lower 8 bits of the opcode. */ - if (ntohs(arp->ar_op) <= 0xff) { - flow->nw_proto = ntohs(arp->ar_op); - } - if ((flow->nw_proto == ARP_OP_REQUEST) - || (flow->nw_proto == ARP_OP_REPLY)) { - flow->nw_src = arp->ar_spa; - flow->nw_dst = arp->ar_tpa; - } + if ((flow->nw_proto == ARP_OP_REQUEST) + || (flow->nw_proto == ARP_OP_REPLY)) { + flow->nw_src = arp->ar_spa; + flow->nw_dst = arp->ar_tpa; } } } diff --git a/tests/flowgen.pl b/tests/flowgen.pl index bbb6f11d4..a14faa709 100755 --- a/tests/flowgen.pl +++ b/tests/flowgen.pl @@ -111,6 +111,10 @@ sub output { my $packet = ''; $packet .= pack_ethaddr($flow{DL_DST}); $packet .= pack_ethaddr($flow{DL_SRC}); + if ($flow{DL_VLAN} != 0xffff) { + $packet .= pack('nn', 0x8100, $flow{DL_VLAN}); + } + my $len_ofs = length($packet); $packet .= pack('n', 0) if $attrs{DL_HEADER} =~ /^802.2/; if ($attrs{DL_HEADER} eq '802.2') { $packet .= pack('CCC', 0x42, 0x42, 0x03); # LLC for 802.1D STP. @@ -119,9 +123,6 @@ sub output { $packet .= pack('CCC', 0xaa, 0xaa, 0x03); # LLC for SNAP. $packet .= pack('CCC', 0, 0, 0); # SNAP OUI. } - if ($attrs{DL_VLAN} ne 'none') { - $packet .= pack('nn', 0x8100, $flow{DL_VLAN}); - } $packet .= pack('n', $flow{DL_TYPE}); if ($attrs{DL_TYPE} eq 'ip') { my $ip = pack('CCnnnCCnNN', @@ -193,8 +194,11 @@ sub output { $packet .= $ip; } } - substr($packet, 12, 2) = pack('n', length($packet)) - if $attrs{DL_HEADER} =~ /^802.2/; + if ($attrs{DL_HEADER} =~ /^802.2/) { + my $len = length ($packet); + $len -= 4 if $flow{DL_VLAN} != 0xffff; + substr($packet, $len_ofs, 2) = pack('n', $len); + } print join(' ', map("$_=$attrs{$_}", keys(%attrs))), "\n"; print join(' ', map("$_=$flow{$_}", keys(%flow))), "\n"; diff --git a/tests/test-flows.c b/tests/test-flows.c index be401edf9..3d31aae16 100644 --- a/tests/test-flows.c +++ b/tests/test-flows.c @@ -75,6 +75,7 @@ main(int argc OVS_UNUSED, char *argv[]) printf("mismatch on packet #%d (1-based).\n", n); printf("Packet:\n"); ofp_print_packet(stdout, packet->data, packet->size, packet->size); + ovs_hex_dump(stdout, packet->data, packet->size, 0, true); printf("Expected flow:\n%s\n", exp_s); printf("Actually extracted flow:\n%s\n", got_s); printf("\n");