datapath: Fix handling of 802.1Q and SNAP headers.
authorBen Pfaff <blp@nicira.com>
Tue, 10 Aug 2010 18:35:46 +0000 (11:35 -0700)
committerBen Pfaff <blp@nicira.com>
Tue, 10 Aug 2010 18:35:46 +0000 (11:35 -0700)
The kernel and user datapaths have code that assumes that 802.1Q headers
are used only inside Ethernet II frames, not inside SNAP-encapsulated
frames.  But the kernel and user flow_extract() implementations would
interpret 802.1Q headers inside SNAP headers as being valid VLANs.  This
would cause packet corruption if any VLAN-related actions were to be taken,
so change the two flow_extract() implementations only to accept 802.1Q as
an Ethernet II frame type, not as a SNAP-encoded frame type.

802.1Q-2005 says that this is correct anyhow:

    Where the ISS instance used to transmit and receive tagged frames is
    provided by a media access control method that can support Ethernet
    Type encoding directly (e.g., is an IEEE 802.3 or IEEE 802.11 MAC) or
    is media access method independent (e.g., 6.6), the TPID is Ethernet
    Type encoded, i.e., is two octets in length and comprises solely the
    assigned Ethernet Type value.

    Where the ISS instance is provided by a media access method that
    cannot directly support Ethernet Type encoding (e.g., is an IEEE
    802.5 or FDDI MAC), the TPID is encoded according to the rule for
    a Subnetwork Access Protocol (Clause 10 of IEEE Std 802) that
    encapsulates Ethernet frames over LLC, and comprises the SNAP
    header (AA-AA-03) followed by the SNAP PID (00-00-00) followed by
    the two octets of the assigned Ethernet Type value.

All of the media that OVS handles supports Ethernet Type fields, so to me
that means that we don't have to handle 802.1Q-inside-SNAP.

On the other hand, we *do* have to handle SNAP-inside-802.1Q, because this
is actually allowed by the standards.  So this commit also adds that
support.

I verified that, with this change, both SNAP and Ethernet packets are
properly recognized both with and without 802.1Q encapsulation.

I was a bit surprised to find out that Linux does not accept
SNAP-encapsulated IP frames on Ethernet.

Here's a summary of how frames are handled before and after this commit:

Common cases
------------

       Ethernet
    +------------+
1.  |dst|src|TYPE|
    +------------+

       Ethernet       LLC         SNAP
    +------------+ +--------+ +-----------+
2.  |dst|src| len| |aa|aa|03| |000000|TYPE|
    +------------+ +--------+ +-----------+

       Ethernet       802.1Q
    +------------+ +---------+
3.  |dst|src|8100| |VLAN|TYPE|
    +------------+ +---------+

       Ethernet       802.1Q      LLC         SNAP
    +------------+ +---------+ +--------+ +-----------+
4.  |dst|src|8100| |VLAN| LEN| |aa|aa|03| |000000|TYPE|
    +------------+ +---------+ +--------+ +-----------+

Unusual cases
-------------

       Ethernet       LLC         SNAP         802.1Q
    +------------+ +--------+ +-----------+ +---------+
5.  |dst|src| len| |aa|aa|03| |000000|8100| |VLAN|TYPE|
    +------------+ +--------+ +-----------+ +---------+

       Ethernet       LLC
    +------------+ +--------+
6.  |dst|src| len| |xx|xx|xx|
    +------------+ +--------+

       Ethernet       LLC         SNAP
    +------------+ +--------+ +-----------+
7.  |dst|src| len| |aa|aa|03| |xxxxxx|xxxx|
    +------------+ +--------+ +-----------+

       Ethernet       802.1Q      LLC
    +------------+ +---------+ +--------+
8.  |dst|src|8100| |VLAN| LEN| |xx|xx|xx|
    +------------+ +---------+ +--------+

       Ethernet       802.1Q      LLC         SNAP
    +------------+ +---------+ +--------+ +-----------+
9.  |dst|src|8100| |VLAN| LEN| |aa|aa|03| |xxxxxx|xxxx|
    +------------+ +---------+ +--------+ +-----------+

Behavior
--------

   ---------------  ---------------  -------------------------------------
       Before           After
     this commit      this commit
   dl_type dl_vlan  dl_type dl_vlan  Notes
   ------- -------  ------- -------  -------------------------------------
1.   TYPE    ffff     TYPE    ffff   no change
2.   TYPE    ffff     TYPE    ffff   no change
3.   TYPE    VLAN     TYPE    VLAN   no change
4.    LEN    VLAN     TYPE    VLAN   proposal fixes behavior
5.   TYPE    VLAN     8100    ffff   802.1Q says this is invalid framing
6.   05ff    ffff     05ff    ffff   no change
7.   05ff    ffff     05ff    ffff   no change
8.    LEN    VLAN     05ff    VLAN   proposal fixes behavior
9.    LEN    VLAN     05ff    VLAN   proposal fixes behavior

Signed-off-by: Ben Pfaff <blp@nicira.com>
datapath/flow.c
lib/flow.c
tests/flowgen.pl
tests/test-flows.c

index 8b736c8..f769b14 100644 (file)
@@ -171,23 +171,52 @@ void flow_deferred_free_acts(struct sw_flow_actions *sf_acts)
        call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
 }
 
-#define SNAP_OUI_LEN 3
-
-struct eth_snap_hdr
+static void parse_vlan(struct sk_buff *skb, struct odp_flow_key *key)
 {
-       struct ethhdr eth;
-       u8  dsap;  /* Always 0xAA */
-       u8  ssap;  /* Always 0xAA */
-       u8  ctrl;
-       u8  oui[SNAP_OUI_LEN];
-       u16 ethertype;
-} __attribute__ ((packed));
-
-static int is_snap(const struct eth_snap_hdr *esh)
+       struct qtag_prefix {
+               __be16 eth_type; /* ETH_P_8021Q */
+               __be16 tci;
+       };
+       struct qtag_prefix *qp;
+
+       if (skb->len < sizeof(struct qtag_prefix) + sizeof(__be16))
+               return;
+
+       qp = (struct qtag_prefix *) skb->data;
+       key->dl_vlan = qp->tci & htons(VLAN_VID_MASK);
+       key->dl_vlan_pcp = (ntohs(qp->tci) & VLAN_PCP_MASK) >> VLAN_PCP_SHIFT;
+       __skb_pull(skb, sizeof(struct qtag_prefix));
+}
+
+static __be16 parse_ethertype(struct sk_buff *skb)
 {
-       return (esh->dsap == LLC_SAP_SNAP
-               && esh->ssap == LLC_SAP_SNAP
-               && !memcmp(esh->oui, "\0\0\0", 3));
+       struct llc_snap_hdr {
+               u8  dsap;  /* Always 0xAA */
+               u8  ssap;  /* Always 0xAA */
+               u8  ctrl;
+               u8  oui[3];
+               u16 ethertype;
+       };
+       struct llc_snap_hdr *llc;
+       __be16 proto;
+
+       proto = *(__be16 *) skb->data;
+       __skb_pull(skb, sizeof(__be16));
+
+       if (ntohs(proto) >= ODP_DL_TYPE_ETH2_CUTOFF)
+               return proto;
+
+       if (unlikely(skb->len < sizeof(struct llc_snap_hdr)))
+               return htons(ODP_DL_TYPE_NOT_ETH_TYPE);
+
+       llc = (struct llc_snap_hdr *) skb->data;
+       if (llc->dsap != LLC_SAP_SNAP ||
+           llc->ssap != LLC_SAP_SNAP ||
+           (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0)
+               return htons(ODP_DL_TYPE_NOT_ETH_TYPE);
+
+       __skb_pull(skb, sizeof(struct llc_snap_hdr));
+       return llc->ethertype;
 }
 
 /* Parses the Ethernet frame in 'skb', which was received on 'in_port',
@@ -196,9 +225,7 @@ static int is_snap(const struct eth_snap_hdr *esh)
 int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key)
 {
        struct ethhdr *eth;
-       struct eth_snap_hdr *esh;
        int retval = 0;
-       int nh_ofs;
 
        memset(key, 0, sizeof *key);
        key->tun_id = OVS_CB(skb)->tun_id;
@@ -207,43 +234,28 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key)
 
        if (skb->len < sizeof *eth)
                return 0;
-       if (!pskb_may_pull(skb, skb->len >= 64 ? 64 : skb->len)) {
+       if (!pskb_may_pull(skb, skb->len >= 64 ? 64 : skb->len))
                return 0;
-       }
 
        skb_reset_mac_header(skb);
-       eth = eth_hdr(skb);
-       esh = (struct eth_snap_hdr *) eth;
-       nh_ofs = sizeof *eth;
-       if (likely(ntohs(eth->h_proto) >= ODP_DL_TYPE_ETH2_CUTOFF))
-               key->dl_type = eth->h_proto;
-       else if (skb->len >= sizeof *esh && is_snap(esh)) {
-               key->dl_type = esh->ethertype;
-               nh_ofs = sizeof *esh;
-       } else {
-               key->dl_type = htons(ODP_DL_TYPE_NOT_ETH_TYPE);
-               if (skb->len >= nh_ofs + sizeof(struct llc_pdu_un)) {
-                       nh_ofs += sizeof(struct llc_pdu_un); 
-               }
-       }
 
-       /* Check for a VLAN tag */
-       if (key->dl_type == htons(ETH_P_8021Q) &&
-           skb->len >= nh_ofs + sizeof(struct vlan_hdr)) {
-               struct vlan_hdr *vh = (struct vlan_hdr*)(skb->data + nh_ofs);
-               key->dl_type = vh->h_vlan_encapsulated_proto;
-               key->dl_vlan = vh->h_vlan_TCI & htons(VLAN_VID_MASK);
-               key->dl_vlan_pcp = (ntohs(vh->h_vlan_TCI) & VLAN_PCP_MASK) >> VLAN_PCP_SHIFT;
-               nh_ofs += sizeof(struct vlan_hdr);
-       }
+       /* Link layer. */
+       eth = eth_hdr(skb);
        memcpy(key->dl_src, eth->h_source, ETH_ALEN);
        memcpy(key->dl_dst, eth->h_dest, ETH_ALEN);
-       skb_set_network_header(skb, nh_ofs);
+
+       /* dl_type, dl_vlan, dl_vlan_pcp. */
+       __skb_pull(skb, 2 * ETH_ALEN);
+       if (eth->h_proto == htons(ETH_P_8021Q))
+               parse_vlan(skb, key);
+       key->dl_type = parse_ethertype(skb);
+       skb_reset_network_header(skb);
+       __skb_push(skb, skb->data - (unsigned char *)eth);
 
        /* Network layer. */
        if (key->dl_type == htons(ETH_P_IP) && iphdr_ok(skb)) {
                struct iphdr *nh = ip_hdr(skb);
-               int th_ofs = nh_ofs + nh->ihl * 4;
+               int th_ofs = skb_network_offset(skb) + nh->ihl * 4;
                key->nw_src = nh->saddr;
                key->nw_dst = nh->daddr;
                key->nw_tos = nh->tos & ~INET_ECN_MASK;
index 72ee14f..d545661 100644 (file)
@@ -77,16 +77,47 @@ pull_icmp(struct ofpbuf *packet)
     return ofpbuf_try_pull(packet, ICMP_HEADER_LEN);
 }
 
-static struct eth_header *
-pull_eth(struct ofpbuf *packet) 
+static void
+parse_vlan(struct ofpbuf *b, flow_t *flow)
 {
-    return ofpbuf_try_pull(packet, ETH_HEADER_LEN);
+    struct qtag_prefix {
+        uint16_t eth_type;      /* ETH_TYPE_VLAN */
+        uint16_t tci;
+    };
+
+    if (b->size >= sizeof(struct qtag_prefix) + sizeof(uint16_t)) {
+        struct qtag_prefix *qp = ofpbuf_pull(b, sizeof *qp);
+        flow->dl_vlan = qp->tci & htons(VLAN_VID_MASK);
+        flow->dl_vlan_pcp = (ntohs(qp->tci) & VLAN_PCP_MASK) >> VLAN_PCP_SHIFT;
+    }
 }
 
-static struct vlan_header *
-pull_vlan(struct ofpbuf *packet)
+static uint16_t
+parse_ethertype(struct ofpbuf *b)
 {
-    return ofpbuf_try_pull(packet, VLAN_HEADER_LEN);
+    struct llc_snap_header *llc;
+    uint16_t proto;
+
+    proto = *(uint16_t *) ofpbuf_pull(b, sizeof proto);
+    if (ntohs(proto) >= ODP_DL_TYPE_ETH2_CUTOFF) {
+        return proto;
+    }
+
+    if (b->size < sizeof *llc) {
+        return htons(ODP_DL_TYPE_NOT_ETH_TYPE);
+    }
+
+    llc = b->data;
+    if (llc->llc.llc_dsap != LLC_DSAP_SNAP
+        || llc->llc.llc_ssap != LLC_SSAP_SNAP
+        || llc->llc.llc_cntl != LLC_CNTL_SNAP
+        || memcmp(llc->snap.snap_org, SNAP_ORG_ETHERNET,
+                  sizeof llc->snap.snap_org)) {
+        return htons(ODP_DL_TYPE_NOT_ETH_TYPE);
+    }
+
+    ofpbuf_pull(b, sizeof *llc);
+    return llc->snap.snap_type;
 }
 
 /* Returns 1 if 'packet' is an IP fragment, 0 otherwise.
@@ -112,109 +143,86 @@ flow_extract(struct ofpbuf *packet, uint32_t tun_id, uint16_t in_port,
     packet->l4 = NULL;
     packet->l7 = NULL;
 
-    eth = pull_eth(&b);
-    if (eth) {
-        if (ntohs(eth->eth_type) >= OFP_DL_TYPE_ETH2_CUTOFF) {
-            /* This is an Ethernet II frame */
-            flow->dl_type = eth->eth_type;
-        } else {
-            /* This is an 802.2 frame */
-            struct llc_header *llc = ofpbuf_at(&b, 0, sizeof *llc);
-            struct snap_header *snap = ofpbuf_at(&b, sizeof *llc,
-                                                 sizeof *snap);
-            if (llc == NULL) {
-                return 0;
-            }
-            if (snap
-                && llc->llc_dsap == LLC_DSAP_SNAP
-                && llc->llc_ssap == LLC_SSAP_SNAP
-                && llc->llc_cntl == LLC_CNTL_SNAP
-                && !memcmp(snap->snap_org, SNAP_ORG_ETHERNET,
-                           sizeof snap->snap_org)) {
-                flow->dl_type = snap->snap_type;
-                ofpbuf_pull(&b, LLC_SNAP_HEADER_LEN);
-            } else {
-                flow->dl_type = htons(OFP_DL_TYPE_NOT_ETH_TYPE);
-                ofpbuf_pull(&b, sizeof(struct llc_header));
-            }
-        }
+    if (b.size < sizeof *eth) {
+        return 0;
+    }
 
-        /* Check for a VLAN tag */
-        if (flow->dl_type == htons(ETH_TYPE_VLAN)) {
-            struct vlan_header *vh = pull_vlan(&b);
-            if (vh) {
-                flow->dl_type = vh->vlan_next_type;
-                flow->dl_vlan = vh->vlan_tci & htons(VLAN_VID_MASK);
-                flow->dl_vlan_pcp = (ntohs(vh->vlan_tci) & 0xe000) >> 13;
-            }
-        }
-        memcpy(flow->dl_src, eth->eth_src, ETH_ADDR_LEN);
-        memcpy(flow->dl_dst, eth->eth_dst, ETH_ADDR_LEN);
-
-        packet->l3 = b.data;
-        if (flow->dl_type == htons(ETH_TYPE_IP)) {
-            const struct ip_header *nh = pull_ip(&b);
-            if (nh) {
-                flow->nw_src = get_unaligned_u32(&nh->ip_src);
-                flow->nw_dst = get_unaligned_u32(&nh->ip_dst);
-                flow->nw_tos = nh->ip_tos & IP_DSCP_MASK;
-                flow->nw_proto = nh->ip_proto;
-                packet->l4 = b.data;
-                if (!IP_IS_FRAGMENT(nh->ip_frag_off)) {
-                    if (flow->nw_proto == IP_TYPE_TCP) {
-                        const struct tcp_header *tcp = pull_tcp(&b);
-                        if (tcp) {
-                            flow->tp_src = tcp->tcp_src;
-                            flow->tp_dst = tcp->tcp_dst;
-                            packet->l7 = b.data;
-                        } else {
-                            /* Avoid tricking other code into thinking that
-                             * this packet has an L4 header. */
-                            flow->nw_proto = 0;
-                        }
-                    } else if (flow->nw_proto == IP_TYPE_UDP) {
-                        const struct udp_header *udp = pull_udp(&b);
-                        if (udp) {
-                            flow->tp_src = udp->udp_src;
-                            flow->tp_dst = udp->udp_dst;
-                            packet->l7 = b.data;
-                        } else {
-                            /* Avoid tricking other code into thinking that
-                             * this packet has an L4 header. */
-                            flow->nw_proto = 0;
-                        }
-                    } else if (flow->nw_proto == IP_TYPE_ICMP) {
-                        const struct icmp_header *icmp = pull_icmp(&b);
-                        if (icmp) {
-                            flow->icmp_type = htons(icmp->icmp_type);
-                            flow->icmp_code = htons(icmp->icmp_code);
-                            packet->l7 = b.data;
-                        } else {
-                            /* Avoid tricking other code into thinking that
-                             * this packet has an L4 header. */
-                            flow->nw_proto = 0;
-                        }
+    /* Link layer. */
+    eth = b.data;
+    memcpy(flow->dl_src, eth->eth_src, ETH_ADDR_LEN);
+    memcpy(flow->dl_dst, eth->eth_dst, ETH_ADDR_LEN);
+
+    /* dl_type, dl_vlan, dl_vlan_pcp. */
+    ofpbuf_pull(&b, ETH_ADDR_LEN * 2);
+    if (eth->eth_type == htons(ETH_TYPE_VLAN)) {
+        parse_vlan(&b, flow);
+    }
+    flow->dl_type = parse_ethertype(&b);
+
+    /* Network layer. */
+    packet->l3 = b.data;
+    if (flow->dl_type == htons(ETH_TYPE_IP)) {
+        const struct ip_header *nh = pull_ip(&b);
+        if (nh) {
+            flow->nw_src = get_unaligned_u32(&nh->ip_src);
+            flow->nw_dst = get_unaligned_u32(&nh->ip_dst);
+            flow->nw_tos = nh->ip_tos & IP_DSCP_MASK;
+            flow->nw_proto = nh->ip_proto;
+            packet->l4 = b.data;
+            if (!IP_IS_FRAGMENT(nh->ip_frag_off)) {
+                if (flow->nw_proto == IP_TYPE_TCP) {
+                    const struct tcp_header *tcp = pull_tcp(&b);
+                    if (tcp) {
+                        flow->tp_src = tcp->tcp_src;
+                        flow->tp_dst = tcp->tcp_dst;
+                        packet->l7 = b.data;
+                    } else {
+                        /* Avoid tricking other code into thinking that
+                         * this packet has an L4 header. */
+                        flow->nw_proto = 0;
+                    }
+                } else if (flow->nw_proto == IP_TYPE_UDP) {
+                    const struct udp_header *udp = pull_udp(&b);
+                    if (udp) {
+                        flow->tp_src = udp->udp_src;
+                        flow->tp_dst = udp->udp_dst;
+                        packet->l7 = b.data;
+                    } else {
+                        /* Avoid tricking other code into thinking that
+                         * this packet has an L4 header. */
+                        flow->nw_proto = 0;
+                    }
+                } else if (flow->nw_proto == IP_TYPE_ICMP) {
+                    const struct icmp_header *icmp = pull_icmp(&b);
+                    if (icmp) {
+                        flow->icmp_type = htons(icmp->icmp_type);
+                        flow->icmp_code = htons(icmp->icmp_code);
+                        packet->l7 = b.data;
+                    } else {
+                        /* Avoid tricking other code into thinking that
+                         * this packet has an L4 header. */
+                        flow->nw_proto = 0;
                     }
-                } else {
-                    retval = 1;
                 }
+            } else {
+                retval = 1;
+            }
+        }
+    } else if (flow->dl_type == htons(ETH_TYPE_ARP)) {
+        const struct arp_eth_header *arp = pull_arp(&b);
+        if (arp && arp->ar_hrd == htons(1)
+            && arp->ar_pro == htons(ETH_TYPE_IP) 
+            && arp->ar_hln == ETH_ADDR_LEN
+            && arp->ar_pln == 4) {
+            /* We only match on the lower 8 bits of the opcode. */
+            if (ntohs(arp->ar_op) <= 0xff) {
+                flow->nw_proto = ntohs(arp->ar_op);
             }
-        } else if (flow->dl_type == htons(ETH_TYPE_ARP)) {
-            const struct arp_eth_header *arp = pull_arp(&b);
-            if (arp && arp->ar_hrd == htons(1)
-                    && arp->ar_pro == htons(ETH_TYPE_IP) 
-                    && arp->ar_hln == ETH_ADDR_LEN
-                    && arp->ar_pln == 4) {
-                /* We only match on the lower 8 bits of the opcode. */
-                if (ntohs(arp->ar_op) <= 0xff) {
-                    flow->nw_proto = ntohs(arp->ar_op);
-                }
 
-                if ((flow->nw_proto == ARP_OP_REQUEST) 
-                        || (flow->nw_proto == ARP_OP_REPLY)) {
-                    flow->nw_src = arp->ar_spa;
-                    flow->nw_dst = arp->ar_tpa;
-                }
+            if ((flow->nw_proto == ARP_OP_REQUEST) 
+                || (flow->nw_proto == ARP_OP_REPLY)) {
+                flow->nw_src = arp->ar_spa;
+                flow->nw_dst = arp->ar_tpa;
             }
         }
     }
index bbb6f11..a14faa7 100755 (executable)
@@ -111,6 +111,10 @@ sub output {
     my $packet = '';
     $packet .= pack_ethaddr($flow{DL_DST});
     $packet .= pack_ethaddr($flow{DL_SRC});
+    if ($flow{DL_VLAN} != 0xffff) {
+        $packet .= pack('nn', 0x8100, $flow{DL_VLAN});
+    }
+    my $len_ofs = length($packet);
     $packet .= pack('n', 0) if $attrs{DL_HEADER} =~ /^802.2/;
     if ($attrs{DL_HEADER} eq '802.2') {
         $packet .= pack('CCC', 0x42, 0x42, 0x03); # LLC for 802.1D STP.
@@ -119,9 +123,6 @@ sub output {
             $packet .= pack('CCC', 0xaa, 0xaa, 0x03); # LLC for SNAP.
             $packet .= pack('CCC', 0, 0, 0);          # SNAP OUI.
         }
-        if ($attrs{DL_VLAN} ne 'none') {
-            $packet .= pack('nn', 0x8100, $flow{DL_VLAN});
-        }
         $packet .= pack('n', $flow{DL_TYPE});
         if ($attrs{DL_TYPE} eq 'ip') {
             my $ip = pack('CCnnnCCnNN',
@@ -193,8 +194,11 @@ sub output {
             $packet .= $ip;
         }
     }
-    substr($packet, 12, 2) = pack('n', length($packet))
-      if $attrs{DL_HEADER} =~ /^802.2/;
+    if ($attrs{DL_HEADER} =~ /^802.2/) {
+        my $len = length ($packet);
+        $len -= 4 if $flow{DL_VLAN} != 0xffff;
+        substr($packet, $len_ofs, 2) = pack('n', $len);
+    }
 
     print join(' ', map("$_=$attrs{$_}", keys(%attrs))), "\n";
     print join(' ', map("$_=$flow{$_}", keys(%flow))), "\n";
index be401ed..3d31aae 100644 (file)
@@ -75,6 +75,7 @@ main(int argc OVS_UNUSED, char *argv[])
             printf("mismatch on packet #%d (1-based).\n", n);
             printf("Packet:\n");
             ofp_print_packet(stdout, packet->data, packet->size, packet->size);
+            ovs_hex_dump(stdout, packet->data, packet->size, 0, true);
             printf("Expected flow:\n%s\n", exp_s);
             printf("Actually extracted flow:\n%s\n", got_s);
             printf("\n");