datapath: optimize flow compare and mask functions
[sliver-openvswitch.git] / datapath / flow.c
index 2ac36b6..7a697a4 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/if_arp.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <linux/sctp.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/icmp.h>
@@ -54,8 +55,8 @@ static void update_range__(struct sw_flow_match *match,
                          size_t offset, size_t size, bool is_mask)
 {
        struct sw_flow_key_range *range = NULL;
-       size_t start = offset;
-       size_t end = offset + size;
+       size_t start = rounddown(offset, sizeof(long));
+       size_t end = roundup(offset + size, sizeof(long));
 
        if (!is_mask)
                range = &match->range;
@@ -82,8 +83,9 @@ static void update_range__(struct sw_flow_match *match,
        do { \
                update_range__(match, offsetof(struct sw_flow_key, field),  \
                                     sizeof((match)->key->field), is_mask); \
-               if (is_mask && match->mask != NULL) {                       \
-                       (match)->mask->key.field = value;                   \
+               if (is_mask) {                                              \
+                       if ((match)->mask)                                  \
+                               (match)->mask->key.field = value;           \
                } else {                                                    \
                        (match)->key->field = value;                        \
                }                                                           \
@@ -93,13 +95,19 @@ static void update_range__(struct sw_flow_match *match,
        do { \
                update_range__(match, offsetof(struct sw_flow_key, field),  \
                                len, is_mask);                              \
-               if (is_mask && match->mask != NULL) {                       \
-                       memcpy(&(match)->mask->key.field, value_p, len);    \
+               if (is_mask) {                                              \
+                       if ((match)->mask)                                  \
+                               memcpy(&(match)->mask->key.field, value_p, len);\
                } else {                                                    \
                        memcpy(&(match)->key->field, value_p, len);         \
                }                                                           \
        } while (0)
 
+static u16 range_n_bytes(const struct sw_flow_key_range *range)
+{
+       return range->end - range->start;
+}
+
 void ovs_match_init(struct sw_flow_match *match,
                    struct sw_flow_key *key,
                    struct sw_flow_mask *mask)
@@ -128,14 +136,16 @@ static bool ovs_match_validate(const struct sw_flow_match *match,
                        | (1ULL << OVS_KEY_ATTR_IPV6)
                        | (1ULL << OVS_KEY_ATTR_TCP)
                        | (1ULL << OVS_KEY_ATTR_UDP)
+                       | (1ULL << OVS_KEY_ATTR_SCTP)
                        | (1ULL << OVS_KEY_ATTR_ICMP)
                        | (1ULL << OVS_KEY_ATTR_ICMPV6)
                        | (1ULL << OVS_KEY_ATTR_ARP)
                        | (1ULL << OVS_KEY_ATTR_ND));
 
-       if (match->key->eth.type == htons(ETH_P_802_2) &&
-           match->mask && (match->mask->key.eth.type == htons(0xffff)))
-               mask_allowed |= (1ULL << OVS_KEY_ATTR_ETHERTYPE);
+       /* Always allowed mask fields. */
+       mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL)
+                      | (1ULL << OVS_KEY_ATTR_IN_PORT)
+                      | (1ULL << OVS_KEY_ATTR_ETHERTYPE));
 
        /* Check key attributes. */
        if (match->key->eth.type == htons(ETH_P_ARP)
@@ -157,6 +167,12 @@ static bool ovs_match_validate(const struct sw_flow_match *match,
                                        mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
                        }
 
+                       if (match->key->ip.proto == IPPROTO_SCTP) {
+                               key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
+                               if (match->mask && (match->mask->key.ip.proto == 0xff))
+                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
+                       }
+
                        if (match->key->ip.proto == IPPROTO_TCP) {
                                key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
                                if (match->mask && (match->mask->key.ip.proto == 0xff))
@@ -183,6 +199,12 @@ static bool ovs_match_validate(const struct sw_flow_match *match,
                                        mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
                        }
 
+                       if (match->key->ip.proto == IPPROTO_SCTP) {
+                               key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
+                               if (match->mask && (match->mask->key.ip.proto == 0xff))
+                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
+                       }
+
                        if (match->key->ip.proto == IPPROTO_TCP) {
                                key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
                                if (match->mask && (match->mask->key.ip.proto == 0xff))
@@ -205,13 +227,19 @@ static bool ovs_match_validate(const struct sw_flow_match *match,
                }
        }
 
-       if ((key_attrs & key_expected) != key_expected)
+       if ((key_attrs & key_expected) != key_expected) {
                /* Key attributes check failed. */
+               OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
+                               key_attrs, key_expected);
                return false;
+       }
 
-       if ((mask_attrs & mask_allowed) != mask_attrs)
+       if ((mask_attrs & mask_allowed) != mask_attrs) {
                /* Mask attributes check failed. */
+               OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
+                               mask_attrs, mask_allowed);
                return false;
+       }
 
        return true;
 }
@@ -272,6 +300,12 @@ static bool udphdr_ok(struct sk_buff *skb)
                                  sizeof(struct udphdr));
 }
 
+static bool sctphdr_ok(struct sk_buff *skb)
+{
+       return pskb_may_pull(skb, skb_transport_offset(skb) +
+                                 sizeof(struct sctphdr));
+}
+
 static bool icmphdr_ok(struct sk_buff *skb)
 {
        return pskb_may_pull(skb, skb_transport_offset(skb) +
@@ -339,20 +373,20 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
                                  sizeof(struct icmp6hdr));
 }
 
-static void flow_key_mask(struct sw_flow_key *dst,
-                         const struct sw_flow_key *src,
-                         const struct sw_flow_mask *mask)
+void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
+                      const struct sw_flow_mask *mask)
 {
-       u8 *m = (u8 *)&mask->key + mask->range.start;
-       u8 *s = (u8 *)src + mask->range.start;
-       u8 *d = (u8 *)dst + mask->range.start;
+       const long *m = (long *)((u8 *)&mask->key + mask->range.start);
+       const long *s = (long *)((u8 *)src + mask->range.start);
+       long *d = (long *)((u8 *)dst + mask->range.start);
        int i;
 
-       memset(dst, 0, sizeof(*dst));
-       for (i = 0; i < ovs_sw_flow_mask_size_roundup(mask); i++) {
-               *d = *s & *m;
-               d++, s++, m++;
-       }
+       /* The memory outside of the 'mask->range' are not set since
+        * further operations on 'dst' only uses contents within
+        * 'mask->range'.
+        */
+       for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long))
+               *d++ = *s++ & *m++;
 }
 
 #define TCP_FLAGS_OFFSET 13
@@ -420,7 +454,7 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets)
        struct flex_array *buckets;
        int i, err;
 
-       buckets = flex_array_alloc(sizeof(struct hlist_head *),
+       buckets = flex_array_alloc(sizeof(struct hlist_head),
                                   n_buckets, GFP_KERNEL);
        if (!buckets)
                return NULL;
@@ -480,7 +514,7 @@ static void __flow_tbl_destroy(struct flow_table *table)
                int ver = table->node_ver;
 
                hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
-                       hlist_del_rcu(&flow->hash_node[ver]);
+                       hlist_del(&flow->hash_node[ver]);
                        ovs_flow_free(flow, false);
                }
        }
@@ -629,8 +663,7 @@ void ovs_flow_free(struct sw_flow *flow, bool deferred)
        if (!flow)
                return;
 
-       ovs_sw_flow_mask_del_ref((struct sw_flow_mask __force *)flow->mask,
-                                deferred);
+       ovs_sw_flow_mask_del_ref(flow->mask, deferred);
 
        if (deferred)
                call_rcu(&flow->rcu, rcu_free_flow_callback);
@@ -792,7 +825,6 @@ invalid:
  * Ethernet header
  * @in_port: port number on which @skb was received.
  * @key: output flow key
- * @key_lenp: length of output flow key
  *
  * The caller must ensure that skb->len >= ETH_HLEN.
  *
@@ -893,6 +925,12 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
                                key->ipv4.tp.src = udp->source;
                                key->ipv4.tp.dst = udp->dest;
                        }
+               } else if (key->ip.proto == IPPROTO_SCTP) {
+                       if (sctphdr_ok(skb)) {
+                               struct sctphdr *sctp = sctp_hdr(skb);
+                               key->ipv4.tp.src = sctp->source;
+                               key->ipv4.tp.dst = sctp->dest;
+                       }
                } else if (key->ip.proto == IPPROTO_ICMP) {
                        if (icmphdr_ok(skb)) {
                                struct icmphdr *icmp = icmp_hdr(skb);
@@ -955,6 +993,12 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
                                key->ipv6.tp.src = udp->source;
                                key->ipv6.tp.dst = udp->dest;
                        }
+               } else if (key->ip.proto == NEXTHDR_SCTP) {
+                       if (sctphdr_ok(skb)) {
+                               struct sctphdr *sctp = sctp_hdr(skb);
+                               key->ipv6.tp.src = sctp->source;
+                               key->ipv6.tp.dst = sctp->dest;
+                       }
                } else if (key->ip.proto == NEXTHDR_ICMP) {
                        if (icmp6hdr_ok(skb)) {
                                error = parse_icmpv6(skb, key, nh_len);
@@ -967,10 +1011,16 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
        return 0;
 }
 
-static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_len)
+static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start,
+                        int key_end)
 {
-       return jhash2((u32 *)((u8 *)key + key_start),
-                     DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0);
+       u32 *hash_key = (u32 *)((u8 *)key + key_start);
+       int hash_u32s = (key_end - key_start) >> 2;
+
+       /* Make sure number of hash bytes are multiple of u32. */
+       BUILD_BUG_ON(sizeof(long) % sizeof(u32));
+
+       return jhash2(hash_key, hash_u32s, 0);
 }
 
 static int flow_key_start(const struct sw_flow_key *key)
@@ -978,35 +1028,43 @@ static int flow_key_start(const struct sw_flow_key *key)
        if (key->tun_key.ipv4_dst)
                return 0;
        else
-               return offsetof(struct sw_flow_key, phy);
+               return rounddown(offsetof(struct sw_flow_key, phy),
+                                         sizeof(long));
 }
 
 static bool __cmp_key(const struct sw_flow_key *key1,
-               const struct sw_flow_key *key2,  int key_start, int key_len)
+               const struct sw_flow_key *key2,  int key_start, int key_end)
 {
-       return !memcmp((u8 *)key1 + key_start,
-                       (u8 *)key2 + key_start, (key_len - key_start));
+       const long *cp1 = (long *)((u8 *)key1 + key_start);
+       const long *cp2 = (long *)((u8 *)key2 + key_start);
+       long diffs = 0;
+       int i;
+
+       for (i = key_start; i < key_end;  i += sizeof(long))
+               diffs |= *cp1++ ^ *cp2++;
+
+       return diffs == 0;
 }
 
-static bool __flow_cmp_key(const struct sw_flow *flow,
-               const struct sw_flow_key *key, int key_start, int key_len)
+static bool __flow_cmp_masked_key(const struct sw_flow *flow,
+               const struct sw_flow_key *key, int key_start, int key_end)
 {
-       return __cmp_key(&flow->key, key, key_start, key_len);
+       return __cmp_key(&flow->key, key, key_start, key_end);
 }
 
 static bool __flow_cmp_unmasked_key(const struct sw_flow *flow,
-                 const struct sw_flow_key *key, int key_start, int key_len)
+                 const struct sw_flow_key *key, int key_start, int key_end)
 {
-       return __cmp_key(&flow->unmasked_key, key, key_start, key_len);
+       return __cmp_key(&flow->unmasked_key, key, key_start, key_end);
 }
 
 bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
-               const struct sw_flow_key *key, int key_len)
+               const struct sw_flow_key *key, int key_end)
 {
        int key_start;
        key_start = flow_key_start(key);
 
-       return __flow_cmp_unmasked_key(flow, key, key_start, key_len);
+       return __flow_cmp_unmasked_key(flow, key, key_start, key_end);
 
 }
 
@@ -1014,32 +1072,34 @@ struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table,
                                       struct sw_flow_match *match)
 {
        struct sw_flow_key *unmasked = match->key;
-       int key_len = match->range.end;
+       int key_end = match->range.end;
        struct sw_flow *flow;
 
        flow = ovs_flow_lookup(table, unmasked);
-       if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_len)))
+       if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_end)))
                flow = NULL;
 
        return flow;
 }
 
 static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table,
-                                   const struct sw_flow_key *flow_key,
+                                   const struct sw_flow_key *unmasked,
                                    struct sw_flow_mask *mask)
 {
        struct sw_flow *flow;
        struct hlist_head *head;
        int key_start = mask->range.start;
-       int key_len = mask->range.end;
+       int key_end = mask->range.end;
        u32 hash;
        struct sw_flow_key masked_key;
 
-       flow_key_mask(&masked_key, flow_key, mask);
-       hash = ovs_flow_hash(&masked_key, key_start, key_len);
+       ovs_flow_key_mask(&masked_key, unmasked, mask);
+       hash = ovs_flow_hash(&masked_key, key_start, key_end);
        head = find_bucket(table, hash);
        hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) {
-               if (__flow_cmp_key(flow, &masked_key, key_start, key_len))
+               if (flow->mask == mask &&
+                   __flow_cmp_masked_key(flow, &masked_key,
+                                         key_start, key_end))
                        return flow;
        }
        return NULL;
@@ -1061,14 +1121,10 @@ struct sw_flow *ovs_flow_lookup(struct flow_table *tbl,
 }
 
 
-void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow,
-                        const struct sw_flow_key *key, int key_len)
+void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow)
 {
-       flow->unmasked_key = *key;
-       flow_key_mask(&flow->key, &flow->unmasked_key, ovsl_dereference(flow->mask));
-       flow->hash = ovs_flow_hash(&flow->key,
-                       ovsl_dereference(flow->mask)->range.start,
-                       ovsl_dereference(flow->mask)->range.end);
+       flow->hash = ovs_flow_hash(&flow->key, flow->mask->range.start,
+                       flow->mask->range.end);
        __tbl_insert(table, flow);
 }
 
@@ -1092,6 +1148,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
        [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
        [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
        [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
+       [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
        [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
        [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
        [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
@@ -1126,24 +1183,33 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
                u16 type = nla_type(nla);
                int expected_len;
 
-               if (type > OVS_KEY_ATTR_MAX || attrs & (1ULL << type))
-                       return -EINVAL;
+               if (type > OVS_KEY_ATTR_MAX) {
+                       OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
+                                 type, OVS_KEY_ATTR_MAX);
+               }
 
-               expected_len = ovs_key_lens[type];
-               if (nla_len(nla) != expected_len && expected_len != -1)
+               if (attrs & (1ULL << type)) {
+                       OVS_NLERR("Duplicate key attribute (type %d).\n", type);
                        return -EINVAL;
+               }
 
-               if (attrs & (1ULL << type))
-                       /* Duplicated field. */
+               expected_len = ovs_key_lens[type];
+               if (nla_len(nla) != expected_len && expected_len != -1) {
+                       OVS_NLERR("Key attribute has unexpected length (type=%d"
+                                 ", length=%d, expected=%d).\n", type,
+                                 nla_len(nla), expected_len);
                        return -EINVAL;
+               }
 
                if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
                        attrs |= 1ULL << type;
                        a[type] = nla;
                }
        }
-       if (rem)
+       if (rem) {
+               OVS_NLERR("Message has %d unknown bytes.\n", rem);
                return -EINVAL;
+       }
 
        *attrsp = attrs;
        return 0;
@@ -1161,8 +1227,8 @@ static int parse_flow_nlattrs(const struct nlattr *attr,
        return __parse_flow_nlattrs(attr, a, attrsp, false);
 }
 
-int ipv4_tun_from_nlattr(const struct nlattr *attr,
-                        struct sw_flow_match *match, bool is_mask)
+int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
+                            struct sw_flow_match *match, bool is_mask)
 {
        struct nlattr *a;
        int rem;
@@ -1181,9 +1247,18 @@ int ipv4_tun_from_nlattr(const struct nlattr *attr,
                        [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
                };
 
-               if (type > OVS_TUNNEL_KEY_ATTR_MAX ||
-                       ovs_tunnel_key_lens[type] != nla_len(a))
+               if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
+                       OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
+                       type, OVS_TUNNEL_KEY_ATTR_MAX);
                        return -EINVAL;
+               }
+
+               if (ovs_tunnel_key_lens[type] != nla_len(a)) {
+                       OVS_NLERR("IPv4 tunnel attribute type has unexpected "
+                                 " length (type=%d, length=%d, expected=%d).\n",
+                                 type, nla_len(a), ovs_tunnel_key_lens[type]);
+                       return -EINVAL;
+               }
 
                switch (type) {
                case OVS_TUNNEL_KEY_ATTR_ID:
@@ -1221,21 +1296,29 @@ int ipv4_tun_from_nlattr(const struct nlattr *attr,
 
        SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
 
-       if (rem > 0)
+       if (rem > 0) {
+               OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
                return -EINVAL;
+       }
 
-       if (!match->key->tun_key.ipv4_dst)
-               return -EINVAL;
+       if (!is_mask) {
+               if (!match->key->tun_key.ipv4_dst) {
+                       OVS_NLERR("IPv4 tunnel destination address is zero.\n");
+                       return -EINVAL;
+               }
 
-       if (!ttl)
-               return -EINVAL;
+               if (!ttl) {
+                       OVS_NLERR("IPv4 tunnel TTL not specified.\n");
+                       return -EINVAL;
+               }
+       }
 
        return 0;
 }
 
-int ipv4_tun_to_nlattr(struct sk_buff *skb,
-                       const struct ovs_key_ipv4_tunnel *tun_key,
-                       const struct ovs_key_ipv4_tunnel *output)
+int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
+                          const struct ovs_key_ipv4_tunnel *tun_key,
+                          const struct ovs_key_ipv4_tunnel *output)
 {
        struct nlattr *nla;
 
@@ -1243,23 +1326,24 @@ int ipv4_tun_to_nlattr(struct sk_buff *skb,
        if (!nla)
                return -EMSGSIZE;
 
-       if (tun_key->tun_flags & TUNNEL_KEY &&
+       if (output->tun_flags & TUNNEL_KEY &&
            nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
                return -EMSGSIZE;
-       if (tun_key->ipv4_src &&
-           nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
+       if (output->ipv4_src &&
+               nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
                return -EMSGSIZE;
-       if (nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
+       if (output->ipv4_dst &&
+               nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
                return -EMSGSIZE;
-       if (tun_key->ipv4_tos &&
-           nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
+       if (output->ipv4_tos &&
+               nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
                return -EMSGSIZE;
        if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
                return -EMSGSIZE;
-       if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) &&
+       if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
                nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
                return -EMSGSIZE;
-       if ((tun_key->tun_flags & TUNNEL_CSUM) &&
+       if ((output->tun_flags & TUNNEL_CSUM) &&
                nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
                return -EMSGSIZE;
 
@@ -1280,23 +1364,30 @@ static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
        if (*attrs & (1ULL << OVS_KEY_ATTR_IN_PORT)) {
                u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
 
-               if (!is_mask && in_port >= DP_MAX_PORTS)
+               if (is_mask)
+                       in_port = 0xffffffff; /* Always exact match in_port. */
+               else if (in_port >= DP_MAX_PORTS)
                        return -EINVAL;
+
                SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
                *attrs &= ~(1ULL << OVS_KEY_ATTR_IN_PORT);
+       } else if (!is_mask) {
+               SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
        }
 
        if (*attrs & (1ULL << OVS_KEY_ATTR_SKB_MARK)) {
                uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) && !defined(CONFIG_NETFILTER)
-               if (!is_mask && mark != 0)
+               if (!is_mask && mark != 0) {
+                       OVS_NLERR("skb->mark must be zero on this kernel (mark=%d).\n", mark);
                        return -EINVAL;
+               }
 #endif
                SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
                *attrs &= ~(1ULL << OVS_KEY_ATTR_SKB_MARK);
        }
        if (*attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) {
-               if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
+               if (ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
                                        is_mask))
                        return -EINVAL;
                *attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL);
@@ -1329,20 +1420,32 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
                __be16 tci;
 
                tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
-               if (!is_mask)
-                       if (!(tci & htons(VLAN_TAG_PRESENT)))
-                               return -EINVAL;
+               if (!(tci & htons(VLAN_TAG_PRESENT))) {
+                       if (is_mask)
+                               OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
+                       else
+                               OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
+
+                       return -EINVAL;
+               }
 
                SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
                attrs &= ~(1ULL << OVS_KEY_ATTR_VLAN);
-       }
+       } else if (!is_mask)
+               SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
 
        if (attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) {
                __be16 eth_type;
 
                eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
-               if (!is_mask && ntohs(eth_type) < ETH_P_802_3_MIN)
+               if (is_mask) {
+                       /* Always exact match EtherType. */
+                       eth_type = htons(0xffff);
+               } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
+                       OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
+                                       ntohs(eth_type), ETH_P_802_3_MIN);
                        return -EINVAL;
+               }
 
                SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
                attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
@@ -1354,8 +1457,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
                const struct ovs_key_ipv4 *ipv4_key;
 
                ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
-               if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX)
+               if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
+                       OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
+                               ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
                        return -EINVAL;
+               }
                SW_FLOW_KEY_PUT(match, ip.proto,
                                ipv4_key->ipv4_proto, is_mask);
                SW_FLOW_KEY_PUT(match, ip.tos,
@@ -1375,8 +1481,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
                const struct ovs_key_ipv6 *ipv6_key;
 
                ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
-               if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX)
+               if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
+                       OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
+                               ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
                        return -EINVAL;
+               }
                SW_FLOW_KEY_PUT(match, ipv6.label,
                                ipv6_key->ipv6_label, is_mask);
                SW_FLOW_KEY_PUT(match, ip.proto,
@@ -1403,8 +1512,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
                const struct ovs_key_arp *arp_key;
 
                arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
-               if (!is_mask && (arp_key->arp_op & htons(0xff00)))
+               if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
+                       OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
+                                 arp_key->arp_op);
                        return -EINVAL;
+               }
 
                SW_FLOW_KEY_PUT(match, ipv4.addr.src,
                                arp_key->arp_sip, is_mask);
@@ -1456,6 +1568,24 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
                attrs &= ~(1ULL << OVS_KEY_ATTR_UDP);
        }
 
+       if (attrs & (1ULL << OVS_KEY_ATTR_SCTP)) {
+               const struct ovs_key_sctp *sctp_key;
+
+               sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
+               if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
+                       SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+                                       sctp_key->sctp_src, is_mask);
+                       SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+                                       sctp_key->sctp_dst, is_mask);
+               } else {
+                       SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+                                       sctp_key->sctp_src, is_mask);
+                       SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+                                       sctp_key->sctp_dst, is_mask);
+               }
+               attrs &= ~(1ULL << OVS_KEY_ATTR_SCTP);
+       }
+
        if (attrs & (1ULL << OVS_KEY_ATTR_ICMP)) {
                const struct ovs_key_icmp *icmp_key;
 
@@ -1526,24 +1656,36 @@ int ovs_match_from_nlattrs(struct sw_flow_match *match,
        if (err)
                return err;
 
-       if (key_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) {
-               encap = a[OVS_KEY_ATTR_ENCAP];
-               key_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP);
-               if (nla_len(encap)) {
-                       __be16 eth_type = 0; /* ETH_P_8021Q */
+       if ((key_attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
+           (key_attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) &&
+           (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
+               __be16 tci;
 
-                       if (a[OVS_KEY_ATTR_ETHERTYPE])
-                               eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+               if (!((key_attrs & (1ULL << OVS_KEY_ATTR_VLAN)) &&
+                     (key_attrs & (1ULL << OVS_KEY_ATTR_ENCAP)))) {
+                       OVS_NLERR("Invalid Vlan frame.\n");
+                       return -EINVAL;
+               }
 
-                       if  ((eth_type == htons(ETH_P_8021Q)) && (a[OVS_KEY_ATTR_VLAN])) {
-                               encap_valid = true;
-                               key_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
-                               err = parse_flow_nlattrs(encap, a, &key_attrs);
-                       } else
-                               err = -EINVAL;
+               key_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
+               tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+               encap = a[OVS_KEY_ATTR_ENCAP];
+               key_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP);
+               encap_valid = true;
 
+               if (tci & htons(VLAN_TAG_PRESENT)) {
+                       err = parse_flow_nlattrs(encap, a, &key_attrs);
                        if (err)
                                return err;
+               } else if (!tci) {
+                       /* Corner case for truncated 802.1Q header. */
+                       if (nla_len(encap)) {
+                               OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
+                               return -EINVAL;
+                       }
+               } else {
+                       OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
+                       return  -EINVAL;
                }
        }
 
@@ -1556,21 +1698,36 @@ int ovs_match_from_nlattrs(struct sw_flow_match *match,
                if (err)
                        return err;
 
-               if ((mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) && encap_valid) {
+               if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP)  {
                        __be16 eth_type = 0;
+                       __be16 tci = 0;
+
+                       if (!encap_valid) {
+                               OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
+                               return  -EINVAL;
+                       }
 
                        mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP);
                        if (a[OVS_KEY_ATTR_ETHERTYPE])
                                eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+
                        if (eth_type == htons(0xffff)) {
                                mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
                                encap = a[OVS_KEY_ATTR_ENCAP];
                                err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
-                       } else
-                               err = -EINVAL;
+                       } else {
+                               OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
+                                               ntohs(eth_type));
+                               return -EINVAL;
+                       }
 
-                       if (err)
-                               return err;
+                       if (a[OVS_KEY_ATTR_VLAN])
+                               tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+                       if (!(tci & htons(VLAN_TAG_PRESENT))) {
+                               OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
+                               return -EINVAL;
+                       }
                }
 
                err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
@@ -1633,27 +1790,29 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
 {
        struct ovs_key_ethernet *eth_key;
        struct nlattr *nla, *encap;
+       bool is_mask = (swkey != output);
 
-       if (swkey->phy.priority &&
-           nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
+       if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
                goto nla_put_failure;
 
-       if (swkey->tun_key.ipv4_dst &&
-           ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
+       if ((swkey->tun_key.ipv4_dst || is_mask) &&
+           ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
                goto nla_put_failure;
 
-       if (swkey->phy.in_port != DP_MAX_PORTS) {
-               /* Exact match upper 16 bits. */
+       if (swkey->phy.in_port == DP_MAX_PORTS) {
+               if (is_mask && (output->phy.in_port == 0xffff))
+                       if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
+                               goto nla_put_failure;
+       } else {
                u16 upper_u16;
-               upper_u16 = (swkey == output) ? 0 : 0xffff;
+               upper_u16 = !is_mask ? 0 : 0xffff;
 
                if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
-                                       (upper_u16 << 16) | output->phy.in_port))
+                               (upper_u16 << 16) | output->phy.in_port))
                        goto nla_put_failure;
        }
 
-       if (swkey->phy.skb_mark &&
-           nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
+       if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
                goto nla_put_failure;
 
        nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
@@ -1666,7 +1825,7 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
 
        if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
                __be16 eth_type;
-               eth_type = (swkey == output) ? htons(ETH_P_8021Q) : htons(0xffff) ;
+               eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
                if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
                    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
                        goto nla_put_failure;
@@ -1676,12 +1835,22 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
        } else
                encap = NULL;
 
-       if ((swkey == output) && (swkey->eth.type == htons(ETH_P_802_2)))
+       if (swkey->eth.type == htons(ETH_P_802_2)) {
+               /*
+                * Ethertype 802.2 is represented in the netlink with omitted
+                * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
+                * 0xffff in the mask attribute.  Ethertype can also
+                * be wildcarded.
+                */
+               if (is_mask && output->eth.type)
+                       if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
+                                               output->eth.type))
+                               goto nla_put_failure;
                goto unencap;
+       }
 
-       if (output->eth.type != 0)
-               if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
-                       goto nla_put_failure;
+       if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
+               goto nla_put_failure;
 
        if (swkey->eth.type == htons(ETH_P_IP)) {
                struct ovs_key_ipv4 *ipv4_key;
@@ -1760,6 +1929,20 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
                                udp_key->udp_src = output->ipv6.tp.src;
                                udp_key->udp_dst = output->ipv6.tp.dst;
                        }
+               } else if (swkey->ip.proto == IPPROTO_SCTP) {
+                       struct ovs_key_sctp *sctp_key;
+
+                       nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
+                       if (!nla)
+                               goto nla_put_failure;
+                       sctp_key = nla_data(nla);
+                       if (swkey->eth.type == htons(ETH_P_IP)) {
+                               sctp_key->sctp_src = swkey->ipv4.tp.src;
+                               sctp_key->sctp_dst = swkey->ipv4.tp.dst;
+                       } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+                               sctp_key->sctp_src = swkey->ipv6.tp.src;
+                               sctp_key->sctp_dst = swkey->ipv6.tp.dst;
+                       }
                } else if (swkey->eth.type == htons(ETH_P_IP) &&
                           swkey->ip.proto == IPPROTO_ICMP) {
                        struct ovs_key_icmp *icmp_key;
@@ -1812,6 +1995,8 @@ nla_put_failure:
  * Returns zero if successful or a negative error code. */
 int ovs_flow_init(void)
 {
+       BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
+
        flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
                                        0, NULL);
        if (flow_cache == NULL)
@@ -1874,7 +2059,7 @@ static bool ovs_sw_flow_mask_equal(const struct sw_flow_mask *a,
 
        return  (a->range.end == b->range.end)
                && (a->range.start == b->range.start)
-               && (memcmp(a_, b_, ovs_sw_flow_mask_actual_size(a)) == 0);
+               && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0);
 }
 
 struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl,
@@ -1911,5 +2096,5 @@ static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
        u8 *m = (u8 *)&mask->key + range->start;
 
        mask->range = *range;
-       memset(m, val, ovs_sw_flow_mask_size_roundup(mask));
+       memset(m, val, range_n_bytes(range));
 }