Merge branch 'mainstream'
authorGiuseppe Lettieri <g.lettieri@iet.unipi.it>
Mon, 7 Oct 2013 10:34:23 +0000 (12:34 +0200)
committerGiuseppe Lettieri <g.lettieri@iet.unipi.it>
Mon, 7 Oct 2013 10:34:23 +0000 (12:34 +0200)
85 files changed:
AUTHORS
FAQ
NEWS
datapath/Modules.mk
datapath/datapath.c
datapath/datapath.h
datapath/flow.c
datapath/flow.h
datapath/flow_netlink.c [new file with mode: 0644]
datapath/flow_netlink.h [new file with mode: 0644]
datapath/flow_table.c [new file with mode: 0644]
datapath/flow_table.h [new file with mode: 0644]
datapath/linux/.gitignore
datapath/linux/compat/vxlan.c
include/openflow/nicira-ext.h
include/openflow/openflow-common.h
lib/aes128.c
lib/automake.mk
lib/classifier.c
lib/classifier.h
lib/coverage-unixctl.man
lib/coverage.c
lib/coverage.h
lib/dpif.c
lib/flow.c
lib/flow.h
lib/jsonrpc.c
lib/jsonrpc.h
lib/lockfile.c
lib/mac-learning.c
lib/match.c
lib/match.h
lib/netlink-socket.c
lib/netlink.c
lib/netlink.h
lib/nx-match.c
lib/odp-util.c
lib/odp-util.h
lib/ofp-actions.c
lib/ofp-parse.c
lib/ofp-util.c
lib/ofp-version-opt.c
lib/process.c
lib/signals.c
lib/stream-ssl.c
lib/stream.c
lib/stream.h
lib/tag.c [new file with mode: 0644]
lib/tag.h [new file with mode: 0644]
lib/timeval.c
lib/util.h
lib/vconn-active.man
lib/vconn-passive.man
lib/vconn-stream.c
lib/vconn.c
lib/vlandev.c
lib/vlog.c
manpages.mk
ofproto/connmgr.c
ofproto/ofproto-dpif-upcall.c
ofproto/ofproto-dpif-upcall.h
ofproto/ofproto-dpif-xlate.c
ofproto/ofproto-dpif-xlate.h
ofproto/ofproto-dpif.c
ofproto/ofproto-provider.h
ofproto/ofproto.c
ovsdb/SPECS
ovsdb/log.c
ovsdb/ovsdb-server.1.in
ovsdb/ovsdb-tool.c
rhel/etc_init.d_openvswitch
tests/ofproto-dpif.at
tests/ofproto-macros.at
tests/test-bundle.c
tests/test-multipath.c
tests/test-odp.c
tests/tunnel.at
third-party/ofp-tcpdump.patch
utilities/bugtool/ovs-bugtool.8.in
utilities/bugtool/ovs-bugtool.in
utilities/ovs-controller.8.in
utilities/ovs-dpctl.c
utilities/ovs-ofctl.8.in
vswitchd/bridge.c
vswitchd/vswitch.xml

diff --git a/AUTHORS b/AUTHORS
index 63c1ef8..7892328 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -33,6 +33,7 @@ Duffie Cooley           dcooley@nicira.com
 Ed Maste                emaste at freebsd.org
 Edward Tomasz NapieraÅ‚a trasz@freebsd.org
 Ethan Jackson           ethan@nicira.com
+Flavio Leitner          fbl@redhat.com
 FUJITA Tomonori         fujita.tomonori@lab.ntt.co.jp
 Gaetano Catalli         gaetano.catalli@gmail.com
 Giuseppe Lettieri       g.lettieri@iet.unipi.it
diff --git a/FAQ b/FAQ
index 5744d5a..d36495c 100644 (file)
--- a/FAQ
+++ b/FAQ
@@ -1299,6 +1299,39 @@ A: Yes, OpenFlow requires a switch to ignore attempts to send a packet
                                        2,3,4,5,6,\
                                        pop:NXM_OF_IN_PORT[]
 
+Q: My bridge br0 has host 192.168.0.1 on port 1 and host 192.168.0.2
+   on port 2.  I set up flows to forward only traffic destined to the
+   other host and drop other traffic, like this:
+
+      priority=5,in_port=1,ip,nw_dst=192.168.0.2,actions=2
+      priority=5,in_port=2,ip,nw_dst=192.168.0.1,actions=1
+      priority=0,actions=drop
+
+   But it doesn't work--I don't get any connectivity when I do this.
+   Why?
+
+A: These flows drop the ARP packets that IP hosts use to establish IP
+   connectivity over Ethernet.  To solve the problem, add flows to
+   allow ARP to pass between the hosts:
+
+      priority=5,in_port=1,arp,actions=2
+      priority=5,in_port=2,arp,actions=1
+
+   This issue can manifest other ways, too.  The following flows that
+   match on Ethernet addresses instead of IP addresses will also drop
+   ARP packets, because ARP requests are broadcast instead of being
+   directed to a specific host:
+
+      priority=5,in_port=1,dl_dst=54:00:00:00:00:02,actions=2
+      priority=5,in_port=2,dl_dst=54:00:00:00:00:01,actions=1
+      priority=0,actions=drop
+
+   The solution already described above will also work in this case.
+   It may be better to add flows to allow all multicast and broadcast
+   traffic:
+
+      priority=5,in_port=1,dl_dst=01:00:00:00:00:00/01:00:00:00:00:00,actions=2
+      priority=5,in_port=2,dl_dst=01:00:00:00:00:00/01:00:00:00:00:00,actions=1
 
 Contact 
 -------
diff --git a/NEWS b/NEWS
index eae1146..94e0da9 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,9 @@
 Post-v2.0.0
 ---------------------
+   - The default OpenFlow and OVSDB ports will change to
+     IANA-assigned numbers in a future release.  Consider updating
+     your installations to specify port numbers instead of using the
+     defaults.
 
 
 v2.0.0 - xx xxx xxxx
index 7ddf79c..b652411 100644 (file)
@@ -11,6 +11,8 @@ openvswitch_sources = \
        datapath.c \
        dp_notify.c \
        flow.c \
+       flow_netlink.c \
+       flow_table.c \
        vport.c \
        vport-gre.c \
        vport-internal_dev.c \
@@ -22,6 +24,8 @@ openvswitch_headers = \
        compat.h \
        datapath.h \
        flow.h \
+       flow_netlink.h \
+       flow_table.h \
        vlan.h \
        vport.h \
        vport-internal_dev.h \
index 4defcdb..9e6df12 100644 (file)
 
 #include "datapath.h"
 #include "flow.h"
+#include "flow_netlink.h"
 #include "vlan.h"
 #include "vport-internal_dev.h"
 #include "vport-netdev.h"
 
-#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
-
 int ovs_net_id __read_mostly;
 
 static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
@@ -164,7 +163,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
 {
        struct datapath *dp = container_of(rcu, struct datapath, rcu);
 
-       ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false);
+       ovs_flow_tbl_destroy(&dp->table);
        free_percpu(dp->stats_percpu);
        release_net(ovs_dp_get_net(dp));
        kfree(dp->ports);
@@ -236,7 +235,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
        }
 
        /* Look up flow. */
-       flow = ovs_flow_lookup(rcu_dereference(dp->table), &key);
+       flow = ovs_flow_tbl_lookup(&dp->table, &key);
        if (unlikely(!flow)) {
                struct dp_upcall_info upcall;
 
@@ -435,7 +434,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
        upcall->dp_ifindex = dp_ifindex;
 
        nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
-       ovs_flow_to_nlattrs(upcall_info->key, upcall_info->key, user_skb);
+       ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb);
        nla_nest_end(user_skb, nla);
 
        if (upcall_info->userdata)
@@ -455,398 +454,6 @@ out:
        return err;
 }
 
-/* Called with ovs_mutex. */
-static int flush_flows(struct datapath *dp)
-{
-       struct flow_table *old_table;
-       struct flow_table *new_table;
-
-       old_table = ovsl_dereference(dp->table);
-       new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
-       if (!new_table)
-               return -ENOMEM;
-
-       rcu_assign_pointer(dp->table, new_table);
-
-       ovs_flow_tbl_destroy(old_table, true);
-       return 0;
-}
-
-static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len)
-{
-
-       struct sw_flow_actions *acts;
-       int new_acts_size;
-       int req_size = NLA_ALIGN(attr_len);
-       int next_offset = offsetof(struct sw_flow_actions, actions) +
-                                       (*sfa)->actions_len;
-
-       if (req_size <= (ksize(*sfa) - next_offset))
-               goto out;
-
-       new_acts_size = ksize(*sfa) * 2;
-
-       if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
-               if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
-                       return ERR_PTR(-EMSGSIZE);
-               new_acts_size = MAX_ACTIONS_BUFSIZE;
-       }
-
-       acts = ovs_flow_actions_alloc(new_acts_size);
-       if (IS_ERR(acts))
-               return (void *)acts;
-
-       memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
-       acts->actions_len = (*sfa)->actions_len;
-       kfree(*sfa);
-       *sfa = acts;
-
-out:
-       (*sfa)->actions_len += req_size;
-       return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
-}
-
-static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
-{
-       struct nlattr *a;
-
-       a = reserve_sfa_size(sfa, nla_attr_size(len));
-       if (IS_ERR(a))
-               return PTR_ERR(a);
-
-       a->nla_type = attrtype;
-       a->nla_len = nla_attr_size(len);
-
-       if (data)
-               memcpy(nla_data(a), data, len);
-       memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
-
-       return 0;
-}
-
-static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype)
-{
-       int used = (*sfa)->actions_len;
-       int err;
-
-       err = add_action(sfa, attrtype, NULL, 0);
-       if (err)
-               return err;
-
-       return used;
-}
-
-static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset)
-{
-       struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset);
-
-       a->nla_len = sfa->actions_len - st_offset;
-}
-
-static int validate_and_copy_actions(const struct nlattr *attr,
-                               const struct sw_flow_key *key, int depth,
-                               struct sw_flow_actions **sfa);
-
-static int validate_and_copy_sample(const struct nlattr *attr,
-                          const struct sw_flow_key *key, int depth,
-                          struct sw_flow_actions **sfa)
-{
-       const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
-       const struct nlattr *probability, *actions;
-       const struct nlattr *a;
-       int rem, start, err, st_acts;
-
-       memset(attrs, 0, sizeof(attrs));
-       nla_for_each_nested(a, attr, rem) {
-               int type = nla_type(a);
-               if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
-                       return -EINVAL;
-               attrs[type] = a;
-       }
-       if (rem)
-               return -EINVAL;
-
-       probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
-       if (!probability || nla_len(probability) != sizeof(u32))
-               return -EINVAL;
-
-       actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
-       if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
-               return -EINVAL;
-
-       /* validation done, copy sample action. */
-       start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
-       if (start < 0)
-               return start;
-       err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32));
-       if (err)
-               return err;
-       st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
-       if (st_acts < 0)
-               return st_acts;
-
-       err = validate_and_copy_actions(actions, key, depth + 1, sfa);
-       if (err)
-               return err;
-
-       add_nested_action_end(*sfa, st_acts);
-       add_nested_action_end(*sfa, start);
-
-       return 0;
-}
-
-static int validate_tp_port(const struct sw_flow_key *flow_key)
-{
-       if (flow_key->eth.type == htons(ETH_P_IP)) {
-               if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
-                       return 0;
-       } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
-               if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
-                       return 0;
-       }
-
-       return -EINVAL;
-}
-
-static int validate_and_copy_set_tun(const struct nlattr *attr,
-                                    struct sw_flow_actions **sfa)
-{
-       struct sw_flow_match match;
-       struct sw_flow_key key;
-       int err, start;
-
-       ovs_match_init(&match, &key, NULL);
-       err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &match, false);
-       if (err)
-               return err;
-
-       start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
-       if (start < 0)
-               return start;
-
-       err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
-                       sizeof(match.key->tun_key));
-       add_nested_action_end(*sfa, start);
-
-       return err;
-}
-
-static int validate_set(const struct nlattr *a,
-                       const struct sw_flow_key *flow_key,
-                       struct sw_flow_actions **sfa,
-                       bool *set_tun)
-{
-       const struct nlattr *ovs_key = nla_data(a);
-       int key_type = nla_type(ovs_key);
-
-       /* There can be only one key in a action */
-       if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
-               return -EINVAL;
-
-       if (key_type > OVS_KEY_ATTR_MAX ||
-           (ovs_key_lens[key_type] != nla_len(ovs_key) &&
-            ovs_key_lens[key_type] != -1))
-               return -EINVAL;
-
-       switch (key_type) {
-       const struct ovs_key_ipv4 *ipv4_key;
-       const struct ovs_key_ipv6 *ipv6_key;
-       int err;
-
-       case OVS_KEY_ATTR_PRIORITY:
-       case OVS_KEY_ATTR_SKB_MARK:
-       case OVS_KEY_ATTR_ETHERNET:
-               break;
-
-       case OVS_KEY_ATTR_TUNNEL:
-               *set_tun = true;
-               err = validate_and_copy_set_tun(a, sfa);
-               if (err)
-                       return err;
-               break;
-
-       case OVS_KEY_ATTR_IPV4:
-               if (flow_key->eth.type != htons(ETH_P_IP))
-                       return -EINVAL;
-
-               if (!flow_key->ip.proto)
-                       return -EINVAL;
-
-               ipv4_key = nla_data(ovs_key);
-               if (ipv4_key->ipv4_proto != flow_key->ip.proto)
-                       return -EINVAL;
-
-               if (ipv4_key->ipv4_frag != flow_key->ip.frag)
-                       return -EINVAL;
-
-               break;
-
-       case OVS_KEY_ATTR_IPV6:
-               if (flow_key->eth.type != htons(ETH_P_IPV6))
-                       return -EINVAL;
-
-               if (!flow_key->ip.proto)
-                       return -EINVAL;
-
-               ipv6_key = nla_data(ovs_key);
-               if (ipv6_key->ipv6_proto != flow_key->ip.proto)
-                       return -EINVAL;
-
-               if (ipv6_key->ipv6_frag != flow_key->ip.frag)
-                       return -EINVAL;
-
-               if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
-                       return -EINVAL;
-
-               break;
-
-       case OVS_KEY_ATTR_TCP:
-               if (flow_key->ip.proto != IPPROTO_TCP)
-                       return -EINVAL;
-
-               return validate_tp_port(flow_key);
-
-       case OVS_KEY_ATTR_UDP:
-               if (flow_key->ip.proto != IPPROTO_UDP)
-                       return -EINVAL;
-
-               return validate_tp_port(flow_key);
-
-       case OVS_KEY_ATTR_SCTP:
-               if (flow_key->ip.proto != IPPROTO_SCTP)
-                       return -EINVAL;
-
-               return validate_tp_port(flow_key);
-
-       default:
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-static int validate_userspace(const struct nlattr *attr)
-{
-       static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] =   {
-               [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
-               [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
-       };
-       struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
-       int error;
-
-       error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
-                                attr, userspace_policy);
-       if (error)
-               return error;
-
-       if (!a[OVS_USERSPACE_ATTR_PID] ||
-           !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
-               return -EINVAL;
-
-       return 0;
-}
-
-static int copy_action(const struct nlattr *from,
-                     struct sw_flow_actions **sfa)
-{
-       int totlen = NLA_ALIGN(from->nla_len);
-       struct nlattr *to;
-
-       to = reserve_sfa_size(sfa, from->nla_len);
-       if (IS_ERR(to))
-               return PTR_ERR(to);
-
-       memcpy(to, from, totlen);
-       return 0;
-}
-
-static int validate_and_copy_actions(const struct nlattr *attr,
-                               const struct sw_flow_key *key,
-                               int depth,
-                               struct sw_flow_actions **sfa)
-{
-       const struct nlattr *a;
-       int rem, err;
-
-       if (depth >= SAMPLE_ACTION_DEPTH)
-               return -EOVERFLOW;
-
-       nla_for_each_nested(a, attr, rem) {
-               /* Expected argument lengths, (u32)-1 for variable length. */
-               static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
-                       [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
-                       [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
-                       [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
-                       [OVS_ACTION_ATTR_POP_VLAN] = 0,
-                       [OVS_ACTION_ATTR_SET] = (u32)-1,
-                       [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
-               };
-               const struct ovs_action_push_vlan *vlan;
-               int type = nla_type(a);
-               bool skip_copy;
-
-               if (type > OVS_ACTION_ATTR_MAX ||
-                   (action_lens[type] != nla_len(a) &&
-                    action_lens[type] != (u32)-1))
-                       return -EINVAL;
-
-               skip_copy = false;
-               switch (type) {
-               case OVS_ACTION_ATTR_UNSPEC:
-                       return -EINVAL;
-
-               case OVS_ACTION_ATTR_USERSPACE:
-                       err = validate_userspace(a);
-                       if (err)
-                               return err;
-                       break;
-
-               case OVS_ACTION_ATTR_OUTPUT:
-                       if (nla_get_u32(a) >= DP_MAX_PORTS)
-                               return -EINVAL;
-                       break;
-
-
-               case OVS_ACTION_ATTR_POP_VLAN:
-                       break;
-
-               case OVS_ACTION_ATTR_PUSH_VLAN:
-                       vlan = nla_data(a);
-                       if (vlan->vlan_tpid != htons(ETH_P_8021Q))
-                               return -EINVAL;
-                       if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
-                               return -EINVAL;
-                       break;
-
-               case OVS_ACTION_ATTR_SET:
-                       err = validate_set(a, key, sfa, &skip_copy);
-                       if (err)
-                               return err;
-                       break;
-
-               case OVS_ACTION_ATTR_SAMPLE:
-                       err = validate_and_copy_sample(a, key, depth, sfa);
-                       if (err)
-                               return err;
-                       skip_copy = true;
-                       break;
-
-               default:
-                       return -EINVAL;
-               }
-               if (!skip_copy) {
-                       err = copy_action(a, sfa);
-                       if (err)
-                               return err;
-               }
-       }
-
-       if (rem > 0)
-               return -EINVAL;
-
-       return 0;
-}
-
 static void clear_stats(struct sw_flow *flow)
 {
        flow->used = 0;
@@ -902,15 +509,16 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
        if (err)
                goto err_flow_free;
 
-       err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]);
+       err = ovs_nla_get_flow_metadata(flow, a[OVS_PACKET_ATTR_KEY]);
        if (err)
                goto err_flow_free;
-       acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
+       acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
        err = PTR_ERR(acts);
        if (IS_ERR(acts))
                goto err_flow_free;
 
-       err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts);
+       err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
+                                  &flow->key, 0, &acts);
        rcu_assign_pointer(flow->sf_acts, acts);
        if (err)
                goto err_flow_free;
@@ -960,11 +568,9 @@ static struct genl_ops dp_packet_genl_ops[] = {
 
 static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
 {
-       struct flow_table *table;
        int i;
 
-       table = rcu_dereference_check(dp->table, lockdep_ovsl_is_held());
-       stats->n_flows = ovs_flow_tbl_count(table);
+       stats->n_flows = ovs_flow_tbl_count(&dp->table);
 
        stats->n_hit = stats->n_missed = stats->n_lost = 0;
        for_each_possible_cpu(i) {
@@ -1005,100 +611,6 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = {
        .name = OVS_FLOW_MCGROUP
 };
 
-static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb);
-static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
-{
-       const struct nlattr *a;
-       struct nlattr *start;
-       int err = 0, rem;
-
-       start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
-       if (!start)
-               return -EMSGSIZE;
-
-       nla_for_each_nested(a, attr, rem) {
-               int type = nla_type(a);
-               struct nlattr *st_sample;
-
-               switch (type) {
-               case OVS_SAMPLE_ATTR_PROBABILITY:
-                       if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a)))
-                               return -EMSGSIZE;
-                       break;
-               case OVS_SAMPLE_ATTR_ACTIONS:
-                       st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
-                       if (!st_sample)
-                               return -EMSGSIZE;
-                       err = actions_to_attr(nla_data(a), nla_len(a), skb);
-                       if (err)
-                               return err;
-                       nla_nest_end(skb, st_sample);
-                       break;
-               }
-       }
-
-       nla_nest_end(skb, start);
-       return err;
-}
-
-static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
-{
-       const struct nlattr *ovs_key = nla_data(a);
-       int key_type = nla_type(ovs_key);
-       struct nlattr *start;
-       int err;
-
-       switch (key_type) {
-       case OVS_KEY_ATTR_IPV4_TUNNEL:
-               start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
-               if (!start)
-                       return -EMSGSIZE;
-
-               err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
-                                            nla_data(ovs_key));
-               if (err)
-                       return err;
-               nla_nest_end(skb, start);
-               break;
-       default:
-               if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
-                       return -EMSGSIZE;
-               break;
-       }
-
-       return 0;
-}
-
-static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb)
-{
-       const struct nlattr *a;
-       int rem, err;
-
-       nla_for_each_attr(a, attr, len, rem) {
-               int type = nla_type(a);
-
-               switch (type) {
-               case OVS_ACTION_ATTR_SET:
-                       err = set_action_to_attr(a, skb);
-                       if (err)
-                               return err;
-                       break;
-
-               case OVS_ACTION_ATTR_SAMPLE:
-                       err = sample_action_to_attr(a, skb);
-                       if (err)
-                               return err;
-                       break;
-               default:
-                       if (nla_put(skb, type, nla_len(a), nla_data(a)))
-                               return -EMSGSIZE;
-                       break;
-               }
-       }
-
-       return 0;
-}
-
 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
 {
        return NLMSG_ALIGN(sizeof(struct ovs_header))
@@ -1135,8 +647,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
        if (!nla)
                goto nla_put_failure;
 
-       err = ovs_flow_to_nlattrs(&flow->unmasked_key,
-                       &flow->unmasked_key, skb);
+       err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);
        if (err)
                goto error;
        nla_nest_end(skb, nla);
@@ -1145,7 +656,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
        if (!nla)
                goto nla_put_failure;
 
-       err = ovs_flow_to_nlattrs(&flow->key, &flow->mask->key, skb);
+       err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
        if (err)
                goto error;
 
@@ -1188,7 +699,8 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
                sf_acts = rcu_dereference_check(flow->sf_acts,
                                                lockdep_ovsl_is_held());
 
-               err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb);
+               err = ovs_nla_put_actions(sf_acts->actions,
+                                         sf_acts->actions_len, skb);
                if (!err)
                        nla_nest_end(skb, start);
                else {
@@ -1243,7 +755,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
        struct sw_flow_mask mask;
        struct sk_buff *reply;
        struct datapath *dp;
-       struct flow_table *table;
        struct sw_flow_actions *acts = NULL;
        struct sw_flow_match match;
        int error;
@@ -1254,21 +765,21 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
                goto error;
 
        ovs_match_init(&match, &key, &mask);
-       error = ovs_match_from_nlattrs(&match,
-                       a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
+       error = ovs_nla_get_match(&match,
+                                 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
        if (error)
                goto error;
 
        /* Validate actions. */
        if (a[OVS_FLOW_ATTR_ACTIONS]) {
-               acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
+               acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
                error = PTR_ERR(acts);
                if (IS_ERR(acts))
                        goto error;
 
-               ovs_flow_key_mask(&masked_key, &key, &mask);
-               error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
-                                                 &masked_key, 0, &acts);
+               ovs_flow_mask_key(&masked_key, &key, &mask);
+               error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
+                                            &masked_key, 0, &acts);
                if (error) {
                        OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
                        goto err_kfree;
@@ -1284,32 +795,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
        if (!dp)
                goto err_unlock_ovs;
 
-       table = ovsl_dereference(dp->table);
-
        /* Check if this is a duplicate flow */
-       flow = ovs_flow_lookup(table, &key);
+       flow = ovs_flow_tbl_lookup(&dp->table, &key);
        if (!flow) {
-               struct flow_table *new_table = NULL;
-               struct sw_flow_mask *mask_p;
-
                /* Bail out if we're not allowed to create a new flow. */
                error = -ENOENT;
                if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
                        goto err_unlock_ovs;
 
-               /* Expand table, if necessary, to make room. */
-               if (ovs_flow_tbl_need_to_expand(table))
-                       new_table = ovs_flow_tbl_expand(table);
-               else if (time_after(jiffies, dp->last_rehash + REHASH_FLOW_INTERVAL))
-                       new_table = ovs_flow_tbl_rehash(table);
-
-               if (new_table && !IS_ERR(new_table)) {
-                       rcu_assign_pointer(dp->table, new_table);
-                       ovs_flow_tbl_destroy(table, true);
-                       table = ovsl_dereference(dp->table);
-                       dp->last_rehash = jiffies;
-               }
-
                /* Allocate flow. */
                flow = ovs_flow_alloc();
                if (IS_ERR(flow)) {
@@ -1320,25 +813,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 
                flow->key = masked_key;
                flow->unmasked_key = key;
-
-               /* Make sure mask is unique in the system */
-               mask_p = ovs_sw_flow_mask_find(table, &mask);
-               if (!mask_p) {
-                       /* Allocate a new mask if none exsits. */
-                       mask_p = ovs_sw_flow_mask_alloc();
-                       if (!mask_p)
-                               goto err_flow_free;
-                       mask_p->key = mask.key;
-                       mask_p->range = mask.range;
-                       ovs_sw_flow_mask_insert(table, mask_p);
-               }
-
-               ovs_sw_flow_mask_add_ref(mask_p);
-               flow->mask = mask_p;
                rcu_assign_pointer(flow->sf_acts, acts);
 
                /* Put flow in bucket. */
-               ovs_flow_insert(table, flow);
+               error = ovs_flow_tbl_insert(&dp->table, flow, &mask);
+               if (error) {
+                       acts = NULL;
+                       goto err_flow_free;
+               }
 
                reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
                                                info->snd_seq, OVS_FLOW_CMD_NEW);
@@ -1359,7 +841,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 
                /* The unmasked key has to be the same for flow updates. */
                error = -EINVAL;
-               if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) {
+               if (!ovs_flow_cmp_unmasked_key(flow, &match)) {
                        OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n");
                        goto err_unlock_ovs;
                }
@@ -1367,7 +849,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
                /* Update actions. */
                old_acts = ovsl_dereference(flow->sf_acts);
                rcu_assign_pointer(flow->sf_acts, acts);
-               ovs_flow_deferred_free_acts(old_acts);
+               ovs_nla_free_flow_actions(old_acts);
 
                reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
                                               info->snd_seq, OVS_FLOW_CMD_NEW);
@@ -1406,7 +888,6 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
        struct sk_buff *reply;
        struct sw_flow *flow;
        struct datapath *dp;
-       struct flow_table *table;
        struct sw_flow_match match;
        int err;
 
@@ -1416,7 +897,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
        }
 
        ovs_match_init(&match, &key, NULL);
-       err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+       err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
        if (err)
                return err;
 
@@ -1427,9 +908,8 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
                goto unlock;
        }
 
-       table = ovsl_dereference(dp->table);
-       flow = ovs_flow_lookup_unmasked_key(table, &match);
-       if (!flow) {
+       flow = ovs_flow_tbl_lookup(&dp->table, &key);
+       if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
                err = -ENOENT;
                goto unlock;
        }
@@ -1456,7 +936,6 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
        struct sk_buff *reply;
        struct sw_flow *flow;
        struct datapath *dp;
-       struct flow_table *table;
        struct sw_flow_match match;
        int err;
 
@@ -1468,18 +947,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
        }
 
        if (!a[OVS_FLOW_ATTR_KEY]) {
-               err = flush_flows(dp);
+               err = ovs_flow_tbl_flush(&dp->table);
                goto unlock;
        }
 
        ovs_match_init(&match, &key, NULL);
-       err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+       err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
        if (err)
                goto unlock;
 
-       table = ovsl_dereference(dp->table);
-       flow = ovs_flow_lookup_unmasked_key(table, &match);
-       if (!flow) {
+       flow = ovs_flow_tbl_lookup(&dp->table, &key);
+       if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
                err = -ENOENT;
                goto unlock;
        }
@@ -1490,7 +968,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
                goto unlock;
        }
 
-       ovs_flow_remove(table, flow);
+       ovs_flow_tbl_remove(&dp->table, flow);
 
        err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
                                     info->snd_seq, 0, OVS_FLOW_CMD_DEL);
@@ -1509,8 +987,8 @@ unlock:
 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
+       struct table_instance *ti;
        struct datapath *dp;
-       struct flow_table *table;
 
        rcu_read_lock();
        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1519,14 +997,14 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
                return -ENODEV;
        }
 
-       table = rcu_dereference(dp->table);
+       ti = rcu_dereference(dp->table.ti);
        for (;;) {
                struct sw_flow *flow;
                u32 bucket, obj;
 
                bucket = cb->args[0];
                obj = cb->args[1];
-               flow = ovs_flow_dump_next(table, &bucket, &obj);
+               flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
                if (!flow)
                        break;
 
@@ -1690,9 +1168,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
        ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
 
        /* Allocate table. */
-       err = -ENOMEM;
-       rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
-       if (!dp->table)
+       err = ovs_flow_tbl_init(&dp->table);
+       if (err)
                goto err_free_dp;
 
        dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
@@ -1749,7 +1226,7 @@ err_destroy_ports_array:
 err_destroy_percpu:
        free_percpu(dp->stats_percpu);
 err_destroy_table:
-       ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false);
+       ovs_flow_tbl_destroy(&dp->table);
 err_free_dp:
        release_net(ovs_dp_get_net(dp));
        kfree(dp);
index 4a49a7d..64920de 100644 (file)
@@ -28,6 +28,7 @@
 
 #include "compat.h"
 #include "flow.h"
+#include "flow_table.h"
 #include "vlan.h"
 #include "vport.h"
 
@@ -59,12 +60,11 @@ struct dp_stats_percpu {
  * struct datapath - datapath for flow-based packet switching
  * @rcu: RCU callback head for deferred destruction.
  * @list_node: Element in global 'dps' list.
- * @table: Current flow table.  Protected by ovs_mutex and RCU.
+ * @table: flow table.
  * @ports: Hash table for ports.  %OVSP_LOCAL port always exists.  Protected by
  * ovs_mutex and RCU.
  * @stats_percpu: Per-CPU datapath statistics.
  * @net: Reference to net namespace.
- * @last_rehash: Timestamp of last rehash.
  *
  * Context: See the comment on locking at the top of datapath.c for additional
  * locking information.
@@ -74,7 +74,7 @@ struct datapath {
        struct list_head list_node;
 
        /* Flow table. */
-       struct flow_table __rcu *table;
+       struct flow_table table;
 
        /* Switch ports. */
        struct hlist_head *ports;
@@ -86,7 +86,6 @@ struct datapath {
        /* Network namespace ref. */
        struct net *net;
 #endif
-       unsigned long last_rehash;
 };
 
 /**
index 29122af..faa4e15 100644 (file)
 
 #include "vlan.h"
 
-static struct kmem_cache *flow_cache;
-
-static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
-               struct sw_flow_key_range *range, u8 val);
-
-static void update_range__(struct sw_flow_match *match,
-                         size_t offset, size_t size, bool is_mask)
+u64 ovs_flow_used_time(unsigned long flow_jiffies)
 {
-       struct sw_flow_key_range *range = NULL;
-       size_t start = rounddown(offset, sizeof(long));
-       size_t end = roundup(offset + size, sizeof(long));
-
-       if (!is_mask)
-               range = &match->range;
-       else if (match->mask)
-               range = &match->mask->range;
-
-       if (!range)
-               return;
-
-       if (range->start == range->end) {
-               range->start = start;
-               range->end = end;
-               return;
-       }
-
-       if (range->start > start)
-               range->start = start;
+       struct timespec cur_ts;
+       u64 cur_ms, idle_ms;
 
-       if (range->end < end)
-               range->end = end;
-}
+       ktime_get_ts(&cur_ts);
+       idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
+       cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC +
+                cur_ts.tv_nsec / NSEC_PER_MSEC;
 
-#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
-       do { \
-               update_range__(match, offsetof(struct sw_flow_key, field),  \
-                                    sizeof((match)->key->field), is_mask); \
-               if (is_mask) {                                              \
-                       if ((match)->mask)                                  \
-                               (match)->mask->key.field = value;           \
-               } else {                                                    \
-                       (match)->key->field = value;                        \
-               }                                                           \
-       } while (0)
-
-#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
-       do { \
-               update_range__(match, offsetof(struct sw_flow_key, field),  \
-                               len, is_mask);                              \
-               if (is_mask) {                                              \
-                       if ((match)->mask)                                  \
-                               memcpy(&(match)->mask->key.field, value_p, len);\
-               } else {                                                    \
-                       memcpy(&(match)->key->field, value_p, len);         \
-               }                                                           \
-       } while (0)
-
-static u16 range_n_bytes(const struct sw_flow_key_range *range)
-{
-       return range->end - range->start;
+       return cur_ms - idle_ms;
 }
 
-void ovs_match_init(struct sw_flow_match *match,
-                   struct sw_flow_key *key,
-                   struct sw_flow_mask *mask)
-{
-       memset(match, 0, sizeof(*match));
-       match->key = key;
-       match->mask = mask;
-
-       memset(key, 0, sizeof(*key));
-
-       if (mask) {
-               memset(&mask->key, 0, sizeof(mask->key));
-               mask->range.start = mask->range.end = 0;
-       }
-}
+#define TCP_FLAGS_OFFSET 13
+#define TCP_FLAG_MASK 0x3f
 
-static bool ovs_match_validate(const struct sw_flow_match *match,
-               u64 key_attrs, u64 mask_attrs)
+void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
 {
-       u64 key_expected = 1ULL << OVS_KEY_ATTR_ETHERNET;
-       u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
-
-       /* The following mask attributes allowed only if they
-        * pass the validation tests. */
-       mask_allowed &= ~((1ULL << OVS_KEY_ATTR_IPV4)
-                       | (1ULL << OVS_KEY_ATTR_IPV6)
-                       | (1ULL << OVS_KEY_ATTR_TCP)
-                       | (1ULL << OVS_KEY_ATTR_UDP)
-                       | (1ULL << OVS_KEY_ATTR_SCTP)
-                       | (1ULL << OVS_KEY_ATTR_ICMP)
-                       | (1ULL << OVS_KEY_ATTR_ICMPV6)
-                       | (1ULL << OVS_KEY_ATTR_ARP)
-                       | (1ULL << OVS_KEY_ATTR_ND));
-
-       /* Always allowed mask fields. */
-       mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL)
-                      | (1ULL << OVS_KEY_ATTR_IN_PORT)
-                      | (1ULL << OVS_KEY_ATTR_ETHERTYPE));
-
-       /* Check key attributes. */
-       if (match->key->eth.type == htons(ETH_P_ARP)
-                       || match->key->eth.type == htons(ETH_P_RARP)) {
-               key_expected |= 1ULL << OVS_KEY_ATTR_ARP;
-               if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
-                       mask_allowed |= 1ULL << OVS_KEY_ATTR_ARP;
-       }
-
-       if (match->key->eth.type == htons(ETH_P_IP)) {
-               key_expected |= 1ULL << OVS_KEY_ATTR_IPV4;
-               if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
-                       mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV4;
-
-               if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
-                       if (match->key->ip.proto == IPPROTO_UDP) {
-                               key_expected |= 1ULL << OVS_KEY_ATTR_UDP;
-                               if (match->mask && (match->mask->key.ip.proto == 0xff))
-                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
-                       }
-
-                       if (match->key->ip.proto == IPPROTO_SCTP) {
-                               key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
-                               if (match->mask && (match->mask->key.ip.proto == 0xff))
-                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
-                       }
-
-                       if (match->key->ip.proto == IPPROTO_TCP) {
-                               key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
-                               if (match->mask && (match->mask->key.ip.proto == 0xff))
-                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP;
-                       }
-
-                       if (match->key->ip.proto == IPPROTO_ICMP) {
-                               key_expected |= 1ULL << OVS_KEY_ATTR_ICMP;
-                               if (match->mask && (match->mask->key.ip.proto == 0xff))
-                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMP;
-                       }
-               }
-       }
-
-       if (match->key->eth.type == htons(ETH_P_IPV6)) {
-               key_expected |= 1ULL << OVS_KEY_ATTR_IPV6;
-               if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
-                       mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV6;
-
-               if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
-                       if (match->key->ip.proto == IPPROTO_UDP) {
-                               key_expected |= 1ULL << OVS_KEY_ATTR_UDP;
-                               if (match->mask && (match->mask->key.ip.proto == 0xff))
-                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
-                       }
-
-                       if (match->key->ip.proto == IPPROTO_SCTP) {
-                               key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
-                               if (match->mask && (match->mask->key.ip.proto == 0xff))
-                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
-                       }
-
-                       if (match->key->ip.proto == IPPROTO_TCP) {
-                               key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
-                               if (match->mask && (match->mask->key.ip.proto == 0xff))
-                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP;
-                       }
-
-                       if (match->key->ip.proto == IPPROTO_ICMPV6) {
-                               key_expected |= 1ULL << OVS_KEY_ATTR_ICMPV6;
-                               if (match->mask && (match->mask->key.ip.proto == 0xff))
-                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMPV6;
-
-                               if (match->key->ipv6.tp.src ==
-                                               htons(NDISC_NEIGHBOUR_SOLICITATION) ||
-                                   match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
-                                       key_expected |= 1ULL << OVS_KEY_ATTR_ND;
-                                       if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
-                                               mask_allowed |= 1ULL << OVS_KEY_ATTR_ND;
-                               }
-                       }
-               }
-       }
-
-       if ((key_attrs & key_expected) != key_expected) {
-               /* Key attributes check failed. */
-               OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
-                               key_attrs, key_expected);
-               return false;
-       }
+       u8 tcp_flags = 0;
 
-       if ((mask_attrs & mask_allowed) != mask_attrs) {
-               /* Mask attributes check failed. */
-               OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
-                               mask_attrs, mask_allowed);
-               return false;
+       if ((flow->key.eth.type == htons(ETH_P_IP) ||
+            flow->key.eth.type == htons(ETH_P_IPV6)) &&
+           flow->key.ip.proto == IPPROTO_TCP &&
+           likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) {
+               u8 *tcp = (u8 *)tcp_hdr(skb);
+               tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK;
        }
 
-       return true;
+       spin_lock(&flow->lock);
+       flow->used = jiffies;
+       flow->packet_count++;
+       flow->byte_count += skb->len;
+       flow->tcp_flags |= tcp_flags;
+       spin_unlock(&flow->lock);
 }
 
 static int check_header(struct sk_buff *skb, int len)
@@ -312,19 +150,6 @@ static bool icmphdr_ok(struct sk_buff *skb)
                                  sizeof(struct icmphdr));
 }
 
-u64 ovs_flow_used_time(unsigned long flow_jiffies)
-{
-       struct timespec cur_ts;
-       u64 cur_ms, idle_ms;
-
-       ktime_get_ts(&cur_ts);
-       idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
-       cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC +
-                cur_ts.tv_nsec / NSEC_PER_MSEC;
-
-       return cur_ms - idle_ms;
-}
-
 static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
 {
        unsigned int nh_ofs = skb_network_offset(skb);
@@ -373,319 +198,6 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
                                  sizeof(struct icmp6hdr));
 }
 
-void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
-                      const struct sw_flow_mask *mask)
-{
-       const long *m = (long *)((u8 *)&mask->key + mask->range.start);
-       const long *s = (long *)((u8 *)src + mask->range.start);
-       long *d = (long *)((u8 *)dst + mask->range.start);
-       int i;
-
-       /* The memory outside of the 'mask->range' are not set since
-        * further operations on 'dst' only uses contents within
-        * 'mask->range'.
-        */
-       for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long))
-               *d++ = *s++ & *m++;
-}
-
-#define TCP_FLAGS_OFFSET 13
-#define TCP_FLAG_MASK 0x3f
-
-void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
-{
-       u8 tcp_flags = 0;
-
-       if ((flow->key.eth.type == htons(ETH_P_IP) ||
-            flow->key.eth.type == htons(ETH_P_IPV6)) &&
-           flow->key.ip.proto == IPPROTO_TCP &&
-           likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) {
-               u8 *tcp = (u8 *)tcp_hdr(skb);
-               tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK;
-       }
-
-       spin_lock(&flow->lock);
-       flow->used = jiffies;
-       flow->packet_count++;
-       flow->byte_count += skb->len;
-       flow->tcp_flags |= tcp_flags;
-       spin_unlock(&flow->lock);
-}
-
-struct sw_flow_actions *ovs_flow_actions_alloc(int size)
-{
-       struct sw_flow_actions *sfa;
-
-       if (size > MAX_ACTIONS_BUFSIZE)
-               return ERR_PTR(-EINVAL);
-
-       sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
-       if (!sfa)
-               return ERR_PTR(-ENOMEM);
-
-       sfa->actions_len = 0;
-       return sfa;
-}
-
-struct sw_flow *ovs_flow_alloc(void)
-{
-       struct sw_flow *flow;
-
-       flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
-       if (!flow)
-               return ERR_PTR(-ENOMEM);
-
-       spin_lock_init(&flow->lock);
-       flow->sf_acts = NULL;
-       flow->mask = NULL;
-
-       return flow;
-}
-
-static struct hlist_head *find_bucket(struct flow_table *table, u32 hash)
-{
-       hash = jhash_1word(hash, table->hash_seed);
-       return flex_array_get(table->buckets,
-                               (hash & (table->n_buckets - 1)));
-}
-
-static struct flex_array *alloc_buckets(unsigned int n_buckets)
-{
-       struct flex_array *buckets;
-       int i, err;
-
-       buckets = flex_array_alloc(sizeof(struct hlist_head),
-                                  n_buckets, GFP_KERNEL);
-       if (!buckets)
-               return NULL;
-
-       err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL);
-       if (err) {
-               flex_array_free(buckets);
-               return NULL;
-       }
-
-       for (i = 0; i < n_buckets; i++)
-               INIT_HLIST_HEAD((struct hlist_head *)
-                                       flex_array_get(buckets, i));
-
-       return buckets;
-}
-
-static void free_buckets(struct flex_array *buckets)
-{
-       flex_array_free(buckets);
-}
-
-static struct flow_table *__flow_tbl_alloc(int new_size)
-{
-       struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL);
-
-       if (!table)
-               return NULL;
-
-       table->buckets = alloc_buckets(new_size);
-
-       if (!table->buckets) {
-               kfree(table);
-               return NULL;
-       }
-       table->n_buckets = new_size;
-       table->count = 0;
-       table->node_ver = 0;
-       table->keep_flows = false;
-       get_random_bytes(&table->hash_seed, sizeof(u32));
-       table->mask_list = NULL;
-
-       return table;
-}
-
-static void __flow_tbl_destroy(struct flow_table *table)
-{
-       int i;
-
-       if (table->keep_flows)
-               goto skip_flows;
-
-       for (i = 0; i < table->n_buckets; i++) {
-               struct sw_flow *flow;
-               struct hlist_head *head = flex_array_get(table->buckets, i);
-               struct hlist_node *n;
-               int ver = table->node_ver;
-
-               hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
-                       hlist_del(&flow->hash_node[ver]);
-                       ovs_flow_free(flow, false);
-               }
-       }
-
-       BUG_ON(!list_empty(table->mask_list));
-       kfree(table->mask_list);
-
-skip_flows:
-       free_buckets(table->buckets);
-       kfree(table);
-}
-
-struct flow_table *ovs_flow_tbl_alloc(int new_size)
-{
-       struct flow_table *table = __flow_tbl_alloc(new_size);
-
-       if (!table)
-               return NULL;
-
-       table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL);
-       if (!table->mask_list) {
-               table->keep_flows = true;
-               __flow_tbl_destroy(table);
-               return NULL;
-       }
-       INIT_LIST_HEAD(table->mask_list);
-
-       return table;
-}
-
-static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
-{
-       struct flow_table *table = container_of(rcu, struct flow_table, rcu);
-
-       __flow_tbl_destroy(table);
-}
-
-void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
-{
-       if (!table)
-               return;
-
-       if (deferred)
-               call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
-       else
-               __flow_tbl_destroy(table);
-}
-
-struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *last)
-{
-       struct sw_flow *flow;
-       struct hlist_head *head;
-       int ver;
-       int i;
-
-       ver = table->node_ver;
-       while (*bucket < table->n_buckets) {
-               i = 0;
-               head = flex_array_get(table->buckets, *bucket);
-               hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
-                       if (i < *last) {
-                               i++;
-                               continue;
-                       }
-                       *last = i + 1;
-                       return flow;
-               }
-               (*bucket)++;
-               *last = 0;
-       }
-
-       return NULL;
-}
-
-static void __tbl_insert(struct flow_table *table, struct sw_flow *flow)
-{
-       struct hlist_head *head;
-
-       head = find_bucket(table, flow->hash);
-       hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
-
-       table->count++;
-}
-
-static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new)
-{
-       int old_ver;
-       int i;
-
-       old_ver = old->node_ver;
-       new->node_ver = !old_ver;
-
-       /* Insert in new table. */
-       for (i = 0; i < old->n_buckets; i++) {
-               struct sw_flow *flow;
-               struct hlist_head *head;
-
-               head = flex_array_get(old->buckets, i);
-
-               hlist_for_each_entry(flow, head, hash_node[old_ver])
-                       __tbl_insert(new, flow);
-       }
-
-       new->mask_list = old->mask_list;
-       old->keep_flows = true;
-}
-
-static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets)
-{
-       struct flow_table *new_table;
-
-       new_table = __flow_tbl_alloc(n_buckets);
-       if (!new_table)
-               return ERR_PTR(-ENOMEM);
-
-       flow_table_copy_flows(table, new_table);
-
-       return new_table;
-}
-
-struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table)
-{
-       return __flow_tbl_rehash(table, table->n_buckets);
-}
-
-struct flow_table *ovs_flow_tbl_expand(struct flow_table *table)
-{
-       return __flow_tbl_rehash(table, table->n_buckets * 2);
-}
-
-static void __flow_free(struct sw_flow *flow)
-{
-       kfree((struct sf_flow_acts __force *)flow->sf_acts);
-       kmem_cache_free(flow_cache, flow);
-}
-
-static void rcu_free_flow_callback(struct rcu_head *rcu)
-{
-       struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
-
-       __flow_free(flow);
-}
-
-void ovs_flow_free(struct sw_flow *flow, bool deferred)
-{
-       if (!flow)
-               return;
-
-       ovs_sw_flow_mask_del_ref(flow->mask, deferred);
-
-       if (deferred)
-               call_rcu(&flow->rcu, rcu_free_flow_callback);
-       else
-               __flow_free(flow);
-}
-
-/* RCU callback used by ovs_flow_deferred_free_acts. */
-static void rcu_free_acts_callback(struct rcu_head *rcu)
-{
-       struct sw_flow_actions *sf_acts = container_of(rcu,
-                       struct sw_flow_actions, rcu);
-       kfree(sf_acts);
-}
-
-/* Schedules 'sf_acts' to be freed after the next RCU grace period.
- * The caller must hold rcu_read_lock for this to be sensible. */
-void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts)
-{
-       call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
-}
-
 static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 {
        struct qtag_prefix {
@@ -1010,1088 +522,3 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
 
        return 0;
 }
-
-static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start,
-                        int key_end)
-{
-       u32 *hash_key = (u32 *)((u8 *)key + key_start);
-       int hash_u32s = (key_end - key_start) >> 2;
-
-       /* Make sure number of hash bytes are multiple of u32. */
-       BUILD_BUG_ON(sizeof(long) % sizeof(u32));
-
-       return jhash2(hash_key, hash_u32s, 0);
-}
-
-static int flow_key_start(const struct sw_flow_key *key)
-{
-       if (key->tun_key.ipv4_dst)
-               return 0;
-       else
-               return rounddown(offsetof(struct sw_flow_key, phy),
-                                         sizeof(long));
-}
-
-static bool __cmp_key(const struct sw_flow_key *key1,
-               const struct sw_flow_key *key2,  int key_start, int key_end)
-{
-       const long *cp1 = (long *)((u8 *)key1 + key_start);
-       const long *cp2 = (long *)((u8 *)key2 + key_start);
-       long diffs = 0;
-       int i;
-
-       for (i = key_start; i < key_end;  i += sizeof(long))
-               diffs |= *cp1++ ^ *cp2++;
-
-       return diffs == 0;
-}
-
-static bool __flow_cmp_masked_key(const struct sw_flow *flow,
-               const struct sw_flow_key *key, int key_start, int key_end)
-{
-       return __cmp_key(&flow->key, key, key_start, key_end);
-}
-
-static bool __flow_cmp_unmasked_key(const struct sw_flow *flow,
-                 const struct sw_flow_key *key, int key_start, int key_end)
-{
-       return __cmp_key(&flow->unmasked_key, key, key_start, key_end);
-}
-
-bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
-               const struct sw_flow_key *key, int key_end)
-{
-       int key_start;
-       key_start = flow_key_start(key);
-
-       return __flow_cmp_unmasked_key(flow, key, key_start, key_end);
-
-}
-
-struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table,
-                                      struct sw_flow_match *match)
-{
-       struct sw_flow_key *unmasked = match->key;
-       int key_end = match->range.end;
-       struct sw_flow *flow;
-
-       flow = ovs_flow_lookup(table, unmasked);
-       if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_end)))
-               flow = NULL;
-
-       return flow;
-}
-
-static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table,
-                                   const struct sw_flow_key *unmasked,
-                                   struct sw_flow_mask *mask)
-{
-       struct sw_flow *flow;
-       struct hlist_head *head;
-       int key_start = mask->range.start;
-       int key_end = mask->range.end;
-       u32 hash;
-       struct sw_flow_key masked_key;
-
-       ovs_flow_key_mask(&masked_key, unmasked, mask);
-       hash = ovs_flow_hash(&masked_key, key_start, key_end);
-       head = find_bucket(table, hash);
-       hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) {
-               if (flow->mask == mask &&
-                   __flow_cmp_masked_key(flow, &masked_key,
-                                         key_start, key_end))
-                       return flow;
-       }
-       return NULL;
-}
-
-struct sw_flow *ovs_flow_lookup(struct flow_table *tbl,
-                               const struct sw_flow_key *key)
-{
-       struct sw_flow *flow = NULL;
-       struct sw_flow_mask *mask;
-
-       list_for_each_entry_rcu(mask, tbl->mask_list, list) {
-               flow = ovs_masked_flow_lookup(tbl, key, mask);
-               if (flow)  /* Found */
-                       break;
-       }
-
-       return flow;
-}
-
-
-void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow)
-{
-       flow->hash = ovs_flow_hash(&flow->key, flow->mask->range.start,
-                       flow->mask->range.end);
-       __tbl_insert(table, flow);
-}
-
-void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow)
-{
-       BUG_ON(table->count == 0);
-       hlist_del_rcu(&flow->hash_node[table->node_ver]);
-       table->count--;
-}
-
-/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
-const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
-       [OVS_KEY_ATTR_ENCAP] = -1,
-       [OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
-       [OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
-       [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
-       [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
-       [OVS_KEY_ATTR_VLAN] = sizeof(__be16),
-       [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
-       [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
-       [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
-       [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
-       [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
-       [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
-       [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
-       [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
-       [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
-       [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
-       [OVS_KEY_ATTR_TUNNEL] = -1,
-};
-
-static bool is_all_zero(const u8 *fp, size_t size)
-{
-       int i;
-
-       if (!fp)
-               return false;
-
-       for (i = 0; i < size; i++)
-               if (fp[i])
-                       return false;
-
-       return true;
-}
-
-static int __parse_flow_nlattrs(const struct nlattr *attr,
-                             const struct nlattr *a[],
-                             u64 *attrsp, bool nz)
-{
-       const struct nlattr *nla;
-       u64 attrs;
-       int rem;
-
-       attrs = *attrsp;
-       nla_for_each_nested(nla, attr, rem) {
-               u16 type = nla_type(nla);
-               int expected_len;
-
-               if (type > OVS_KEY_ATTR_MAX) {
-                       OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
-                                 type, OVS_KEY_ATTR_MAX);
-                       return -EINVAL;
-               }
-
-               if (attrs & (1ULL << type)) {
-                       OVS_NLERR("Duplicate key attribute (type %d).\n", type);
-                       return -EINVAL;
-               }
-
-               expected_len = ovs_key_lens[type];
-               if (nla_len(nla) != expected_len && expected_len != -1) {
-                       OVS_NLERR("Key attribute has unexpected length (type=%d"
-                                 ", length=%d, expected=%d).\n", type,
-                                 nla_len(nla), expected_len);
-                       return -EINVAL;
-               }
-
-               if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
-                       attrs |= 1ULL << type;
-                       a[type] = nla;
-               }
-       }
-       if (rem) {
-               OVS_NLERR("Message has %d unknown bytes.\n", rem);
-               return -EINVAL;
-       }
-
-       *attrsp = attrs;
-       return 0;
-}
-
-static int parse_flow_mask_nlattrs(const struct nlattr *attr,
-                             const struct nlattr *a[], u64 *attrsp)
-{
-       return __parse_flow_nlattrs(attr, a, attrsp, true);
-}
-
-static int parse_flow_nlattrs(const struct nlattr *attr,
-                             const struct nlattr *a[], u64 *attrsp)
-{
-       return __parse_flow_nlattrs(attr, a, attrsp, false);
-}
-
-int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
-                            struct sw_flow_match *match, bool is_mask)
-{
-       struct nlattr *a;
-       int rem;
-       bool ttl = false;
-       __be16 tun_flags = 0;
-
-       nla_for_each_nested(a, attr, rem) {
-               int type = nla_type(a);
-               static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
-                       [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
-                       [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
-                       [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
-                       [OVS_TUNNEL_KEY_ATTR_TOS] = 1,
-                       [OVS_TUNNEL_KEY_ATTR_TTL] = 1,
-                       [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
-                       [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
-               };
-
-               if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
-                       OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
-                       type, OVS_TUNNEL_KEY_ATTR_MAX);
-                       return -EINVAL;
-               }
-
-               if (ovs_tunnel_key_lens[type] != nla_len(a)) {
-                       OVS_NLERR("IPv4 tunnel attribute type has unexpected "
-                                 " length (type=%d, length=%d, expected=%d).\n",
-                                 type, nla_len(a), ovs_tunnel_key_lens[type]);
-                       return -EINVAL;
-               }
-
-               switch (type) {
-               case OVS_TUNNEL_KEY_ATTR_ID:
-                       SW_FLOW_KEY_PUT(match, tun_key.tun_id,
-                                       nla_get_be64(a), is_mask);
-                       tun_flags |= TUNNEL_KEY;
-                       break;
-               case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
-                       SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
-                                       nla_get_be32(a), is_mask);
-                       break;
-               case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
-                       SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
-                                       nla_get_be32(a), is_mask);
-                       break;
-               case OVS_TUNNEL_KEY_ATTR_TOS:
-                       SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
-                                       nla_get_u8(a), is_mask);
-                       break;
-               case OVS_TUNNEL_KEY_ATTR_TTL:
-                       SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
-                                       nla_get_u8(a), is_mask);
-                       ttl = true;
-                       break;
-               case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
-                       tun_flags |= TUNNEL_DONT_FRAGMENT;
-                       break;
-               case OVS_TUNNEL_KEY_ATTR_CSUM:
-                       tun_flags |= TUNNEL_CSUM;
-                       break;
-               default:
-                       return -EINVAL;
-               }
-       }
-
-       SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
-
-       if (rem > 0) {
-               OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
-               return -EINVAL;
-       }
-
-       if (!is_mask) {
-               if (!match->key->tun_key.ipv4_dst) {
-                       OVS_NLERR("IPv4 tunnel destination address is zero.\n");
-                       return -EINVAL;
-               }
-
-               if (!ttl) {
-                       OVS_NLERR("IPv4 tunnel TTL not specified.\n");
-                       return -EINVAL;
-               }
-       }
-
-       return 0;
-}
-
-int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
-                          const struct ovs_key_ipv4_tunnel *tun_key,
-                          const struct ovs_key_ipv4_tunnel *output)
-{
-       struct nlattr *nla;
-
-       nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
-       if (!nla)
-               return -EMSGSIZE;
-
-       if (output->tun_flags & TUNNEL_KEY &&
-           nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
-               return -EMSGSIZE;
-       if (output->ipv4_src &&
-               nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
-               return -EMSGSIZE;
-       if (output->ipv4_dst &&
-               nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
-               return -EMSGSIZE;
-       if (output->ipv4_tos &&
-               nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
-               return -EMSGSIZE;
-       if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
-               return -EMSGSIZE;
-       if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
-               nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
-               return -EMSGSIZE;
-       if ((output->tun_flags & TUNNEL_CSUM) &&
-               nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
-               return -EMSGSIZE;
-
-       nla_nest_end(skb, nla);
-       return 0;
-}
-
-
-static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
-               const struct nlattr **a, bool is_mask)
-{
-       if (*attrs & (1ULL << OVS_KEY_ATTR_PRIORITY)) {
-               SW_FLOW_KEY_PUT(match, phy.priority,
-                         nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
-               *attrs &= ~(1ULL << OVS_KEY_ATTR_PRIORITY);
-       }
-
-       if (*attrs & (1ULL << OVS_KEY_ATTR_IN_PORT)) {
-               u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
-
-               if (is_mask)
-                       in_port = 0xffffffff; /* Always exact match in_port. */
-               else if (in_port >= DP_MAX_PORTS)
-                       return -EINVAL;
-
-               SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
-               *attrs &= ~(1ULL << OVS_KEY_ATTR_IN_PORT);
-       } else if (!is_mask) {
-               SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
-       }
-
-       if (*attrs & (1ULL << OVS_KEY_ATTR_SKB_MARK)) {
-               uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
-
-               SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
-               *attrs &= ~(1ULL << OVS_KEY_ATTR_SKB_MARK);
-       }
-       if (*attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) {
-               if (ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
-                                       is_mask))
-                       return -EINVAL;
-               *attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL);
-       }
-       return 0;
-}
-
-static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
-               const struct nlattr **a, bool is_mask)
-{
-       int err;
-       u64 orig_attrs = attrs;
-
-       err = metadata_from_nlattrs(match, &attrs, a, is_mask);
-       if (err)
-               return err;
-
-       if (attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) {
-               const struct ovs_key_ethernet *eth_key;
-
-               eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
-               SW_FLOW_KEY_MEMCPY(match, eth.src,
-                               eth_key->eth_src, ETH_ALEN, is_mask);
-               SW_FLOW_KEY_MEMCPY(match, eth.dst,
-                               eth_key->eth_dst, ETH_ALEN, is_mask);
-               attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERNET);
-       }
-
-       if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) {
-               __be16 tci;
-
-               tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
-               if (!(tci & htons(VLAN_TAG_PRESENT))) {
-                       if (is_mask)
-                               OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
-                       else
-                               OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
-
-                       return -EINVAL;
-               }
-
-               SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
-               attrs &= ~(1ULL << OVS_KEY_ATTR_VLAN);
-       } else if (!is_mask)
-               SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
-
-       if (attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) {
-               __be16 eth_type;
-
-               eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
-               if (is_mask) {
-                       /* Always exact match EtherType. */
-                       eth_type = htons(0xffff);
-               } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
-                       OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
-                                       ntohs(eth_type), ETH_P_802_3_MIN);
-                       return -EINVAL;
-               }
-
-               SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
-               attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
-       } else if (!is_mask) {
-               SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
-       }
-
-       if (attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
-               const struct ovs_key_ipv4 *ipv4_key;
-
-               ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
-               if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
-                       OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
-                               ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
-                       return -EINVAL;
-               }
-               SW_FLOW_KEY_PUT(match, ip.proto,
-                               ipv4_key->ipv4_proto, is_mask);
-               SW_FLOW_KEY_PUT(match, ip.tos,
-                               ipv4_key->ipv4_tos, is_mask);
-               SW_FLOW_KEY_PUT(match, ip.ttl,
-                               ipv4_key->ipv4_ttl, is_mask);
-               SW_FLOW_KEY_PUT(match, ip.frag,
-                               ipv4_key->ipv4_frag, is_mask);
-               SW_FLOW_KEY_PUT(match, ipv4.addr.src,
-                               ipv4_key->ipv4_src, is_mask);
-               SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
-                               ipv4_key->ipv4_dst, is_mask);
-               attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4);
-       }
-
-       if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) {
-               const struct ovs_key_ipv6 *ipv6_key;
-
-               ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
-               if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
-                       OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
-                               ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
-                       return -EINVAL;
-               }
-               SW_FLOW_KEY_PUT(match, ipv6.label,
-                               ipv6_key->ipv6_label, is_mask);
-               SW_FLOW_KEY_PUT(match, ip.proto,
-                               ipv6_key->ipv6_proto, is_mask);
-               SW_FLOW_KEY_PUT(match, ip.tos,
-                               ipv6_key->ipv6_tclass, is_mask);
-               SW_FLOW_KEY_PUT(match, ip.ttl,
-                               ipv6_key->ipv6_hlimit, is_mask);
-               SW_FLOW_KEY_PUT(match, ip.frag,
-                               ipv6_key->ipv6_frag, is_mask);
-               SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
-                               ipv6_key->ipv6_src,
-                               sizeof(match->key->ipv6.addr.src),
-                               is_mask);
-               SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
-                               ipv6_key->ipv6_dst,
-                               sizeof(match->key->ipv6.addr.dst),
-                               is_mask);
-
-               attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6);
-       }
-
-       if (attrs & (1ULL << OVS_KEY_ATTR_ARP)) {
-               const struct ovs_key_arp *arp_key;
-
-               arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
-               if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
-                       OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
-                                 arp_key->arp_op);
-                       return -EINVAL;
-               }
-
-               SW_FLOW_KEY_PUT(match, ipv4.addr.src,
-                               arp_key->arp_sip, is_mask);
-               SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
-                       arp_key->arp_tip, is_mask);
-               SW_FLOW_KEY_PUT(match, ip.proto,
-                               ntohs(arp_key->arp_op), is_mask);
-               SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
-                               arp_key->arp_sha, ETH_ALEN, is_mask);
-               SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
-                               arp_key->arp_tha, ETH_ALEN, is_mask);
-
-               attrs &= ~(1ULL << OVS_KEY_ATTR_ARP);
-       }
-
-       if (attrs & (1ULL << OVS_KEY_ATTR_TCP)) {
-               const struct ovs_key_tcp *tcp_key;
-
-               tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
-               if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
-                       SW_FLOW_KEY_PUT(match, ipv4.tp.src,
-                                       tcp_key->tcp_src, is_mask);
-                       SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
-                                       tcp_key->tcp_dst, is_mask);
-               } else {
-                       SW_FLOW_KEY_PUT(match, ipv6.tp.src,
-                                       tcp_key->tcp_src, is_mask);
-                       SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
-                                       tcp_key->tcp_dst, is_mask);
-               }
-               attrs &= ~(1ULL << OVS_KEY_ATTR_TCP);
-       }
-
-       if (attrs & (1ULL << OVS_KEY_ATTR_UDP)) {
-               const struct ovs_key_udp *udp_key;
-
-               udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
-               if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
-                       SW_FLOW_KEY_PUT(match, ipv4.tp.src,
-                                       udp_key->udp_src, is_mask);
-                       SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
-                                       udp_key->udp_dst, is_mask);
-               } else {
-                       SW_FLOW_KEY_PUT(match, ipv6.tp.src,
-                                       udp_key->udp_src, is_mask);
-                       SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
-                                       udp_key->udp_dst, is_mask);
-               }
-               attrs &= ~(1ULL << OVS_KEY_ATTR_UDP);
-       }
-
-       if (attrs & (1ULL << OVS_KEY_ATTR_SCTP)) {
-               const struct ovs_key_sctp *sctp_key;
-
-               sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
-               if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
-                       SW_FLOW_KEY_PUT(match, ipv4.tp.src,
-                                       sctp_key->sctp_src, is_mask);
-                       SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
-                                       sctp_key->sctp_dst, is_mask);
-               } else {
-                       SW_FLOW_KEY_PUT(match, ipv6.tp.src,
-                                       sctp_key->sctp_src, is_mask);
-                       SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
-                                       sctp_key->sctp_dst, is_mask);
-               }
-               attrs &= ~(1ULL << OVS_KEY_ATTR_SCTP);
-       }
-
-       if (attrs & (1ULL << OVS_KEY_ATTR_ICMP)) {
-               const struct ovs_key_icmp *icmp_key;
-
-               icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
-               SW_FLOW_KEY_PUT(match, ipv4.tp.src,
-                               htons(icmp_key->icmp_type), is_mask);
-               SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
-                               htons(icmp_key->icmp_code), is_mask);
-               attrs &= ~(1ULL << OVS_KEY_ATTR_ICMP);
-       }
-
-       if (attrs & (1ULL << OVS_KEY_ATTR_ICMPV6)) {
-               const struct ovs_key_icmpv6 *icmpv6_key;
-
-               icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
-               SW_FLOW_KEY_PUT(match, ipv6.tp.src,
-                               htons(icmpv6_key->icmpv6_type), is_mask);
-               SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
-                               htons(icmpv6_key->icmpv6_code), is_mask);
-               attrs &= ~(1ULL << OVS_KEY_ATTR_ICMPV6);
-       }
-
-       if (attrs & (1ULL << OVS_KEY_ATTR_ND)) {
-               const struct ovs_key_nd *nd_key;
-
-               nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
-               SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
-                       nd_key->nd_target,
-                       sizeof(match->key->ipv6.nd.target),
-                       is_mask);
-               SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
-                       nd_key->nd_sll, ETH_ALEN, is_mask);
-               SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
-                               nd_key->nd_tll, ETH_ALEN, is_mask);
-               attrs &= ~(1ULL << OVS_KEY_ATTR_ND);
-       }
-
-       if (attrs != 0)
-               return -EINVAL;
-
-       return 0;
-}
-
-/**
- * ovs_match_from_nlattrs - parses Netlink attributes into a flow key and
- * mask. In case the 'mask' is NULL, the flow is treated as exact match
- * flow. Otherwise, it is treated as a wildcarded flow, except the mask
- * does not include any don't care bit.
- * @match: receives the extracted flow match information.
- * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
- * sequence. The fields should of the packet that triggered the creation
- * of this flow.
- * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
- * attribute specifies the mask field of the wildcarded flow.
- */
-int ovs_match_from_nlattrs(struct sw_flow_match *match,
-                          const struct nlattr *key,
-                          const struct nlattr *mask)
-{
-       const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
-       const struct nlattr *encap;
-       u64 key_attrs = 0;
-       u64 mask_attrs = 0;
-       bool encap_valid = false;
-       int err;
-
-       err = parse_flow_nlattrs(key, a, &key_attrs);
-       if (err)
-               return err;
-
-       if ((key_attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
-           (key_attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) &&
-           (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
-               __be16 tci;
-
-               if (!((key_attrs & (1ULL << OVS_KEY_ATTR_VLAN)) &&
-                     (key_attrs & (1ULL << OVS_KEY_ATTR_ENCAP)))) {
-                       OVS_NLERR("Invalid Vlan frame.\n");
-                       return -EINVAL;
-               }
-
-               key_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
-               tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
-               encap = a[OVS_KEY_ATTR_ENCAP];
-               key_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP);
-               encap_valid = true;
-
-               if (tci & htons(VLAN_TAG_PRESENT)) {
-                       err = parse_flow_nlattrs(encap, a, &key_attrs);
-                       if (err)
-                               return err;
-               } else if (!tci) {
-                       /* Corner case for truncated 802.1Q header. */
-                       if (nla_len(encap)) {
-                               OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
-                               return -EINVAL;
-                       }
-               } else {
-                       OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
-                       return  -EINVAL;
-               }
-       }
-
-       err = ovs_key_from_nlattrs(match, key_attrs, a, false);
-       if (err)
-               return err;
-
-       if (mask) {
-               err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
-               if (err)
-                       return err;
-
-               if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP)  {
-                       __be16 eth_type = 0;
-                       __be16 tci = 0;
-
-                       if (!encap_valid) {
-                               OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
-                               return  -EINVAL;
-                       }
-
-                       mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP);
-                       if (a[OVS_KEY_ATTR_ETHERTYPE])
-                               eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
-
-                       if (eth_type == htons(0xffff)) {
-                               mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
-                               encap = a[OVS_KEY_ATTR_ENCAP];
-                               err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
-                       } else {
-                               OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
-                                               ntohs(eth_type));
-                               return -EINVAL;
-                       }
-
-                       if (a[OVS_KEY_ATTR_VLAN])
-                               tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
-
-                       if (!(tci & htons(VLAN_TAG_PRESENT))) {
-                               OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
-                               return -EINVAL;
-                       }
-               }
-
-               err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
-               if (err)
-                       return err;
-       } else {
-               /* Populate exact match flow's key mask. */
-               if (match->mask)
-                       ovs_sw_flow_mask_set(match->mask, &match->range, 0xff);
-       }
-
-       if (!ovs_match_validate(match, key_attrs, mask_attrs))
-               return -EINVAL;
-
-       return 0;
-}
-
-/**
- * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
- * @flow: Receives extracted in_port, priority, tun_key and skb_mark.
- * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
- * sequence.
- *
- * This parses a series of Netlink attributes that form a flow key, which must
- * take the same form accepted by flow_from_nlattrs(), but only enough of it to
- * get the metadata, that is, the parts of the flow key that cannot be
- * extracted from the packet itself.
- */
-
-int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
-               const struct nlattr *attr)
-{
-       struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
-       const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
-       u64 attrs = 0;
-       int err;
-       struct sw_flow_match match;
-
-       flow->key.phy.in_port = DP_MAX_PORTS;
-       flow->key.phy.priority = 0;
-       flow->key.phy.skb_mark = 0;
-       memset(tun_key, 0, sizeof(flow->key.tun_key));
-
-       err = parse_flow_nlattrs(attr, a, &attrs);
-       if (err)
-               return -EINVAL;
-
-       memset(&match, 0, sizeof(match));
-       match.key = &flow->key;
-
-       err = metadata_from_nlattrs(&match, &attrs, a, false);
-       if (err)
-               return err;
-
-       return 0;
-}
-
-int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
-               const struct sw_flow_key *output, struct sk_buff *skb)
-{
-       struct ovs_key_ethernet *eth_key;
-       struct nlattr *nla, *encap;
-       bool is_mask = (swkey != output);
-
-       if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
-               goto nla_put_failure;
-
-       if ((swkey->tun_key.ipv4_dst || is_mask) &&
-           ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
-               goto nla_put_failure;
-
-       if (swkey->phy.in_port == DP_MAX_PORTS) {
-               if (is_mask && (output->phy.in_port == 0xffff))
-                       if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
-                               goto nla_put_failure;
-       } else {
-               u16 upper_u16;
-               upper_u16 = !is_mask ? 0 : 0xffff;
-
-               if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
-                               (upper_u16 << 16) | output->phy.in_port))
-                       goto nla_put_failure;
-       }
-
-       if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
-               goto nla_put_failure;
-
-       nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
-       if (!nla)
-               goto nla_put_failure;
-
-       eth_key = nla_data(nla);
-       memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
-       memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
-
-       if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
-               __be16 eth_type;
-               eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
-               if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
-                   nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
-                       goto nla_put_failure;
-               encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
-               if (!swkey->eth.tci)
-                       goto unencap;
-       } else
-               encap = NULL;
-
-       if (swkey->eth.type == htons(ETH_P_802_2)) {
-               /*
-                * Ethertype 802.2 is represented in the netlink with omitted
-                * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
-                * 0xffff in the mask attribute.  Ethertype can also
-                * be wildcarded.
-                */
-               if (is_mask && output->eth.type)
-                       if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
-                                               output->eth.type))
-                               goto nla_put_failure;
-               goto unencap;
-       }
-
-       if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
-               goto nla_put_failure;
-
-       if (swkey->eth.type == htons(ETH_P_IP)) {
-               struct ovs_key_ipv4 *ipv4_key;
-
-               nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
-               if (!nla)
-                       goto nla_put_failure;
-               ipv4_key = nla_data(nla);
-               ipv4_key->ipv4_src = output->ipv4.addr.src;
-               ipv4_key->ipv4_dst = output->ipv4.addr.dst;
-               ipv4_key->ipv4_proto = output->ip.proto;
-               ipv4_key->ipv4_tos = output->ip.tos;
-               ipv4_key->ipv4_ttl = output->ip.ttl;
-               ipv4_key->ipv4_frag = output->ip.frag;
-       } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
-               struct ovs_key_ipv6 *ipv6_key;
-
-               nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
-               if (!nla)
-                       goto nla_put_failure;
-               ipv6_key = nla_data(nla);
-               memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
-                               sizeof(ipv6_key->ipv6_src));
-               memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
-                               sizeof(ipv6_key->ipv6_dst));
-               ipv6_key->ipv6_label = output->ipv6.label;
-               ipv6_key->ipv6_proto = output->ip.proto;
-               ipv6_key->ipv6_tclass = output->ip.tos;
-               ipv6_key->ipv6_hlimit = output->ip.ttl;
-               ipv6_key->ipv6_frag = output->ip.frag;
-       } else if (swkey->eth.type == htons(ETH_P_ARP) ||
-                  swkey->eth.type == htons(ETH_P_RARP)) {
-               struct ovs_key_arp *arp_key;
-
-               nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
-               if (!nla)
-                       goto nla_put_failure;
-               arp_key = nla_data(nla);
-               memset(arp_key, 0, sizeof(struct ovs_key_arp));
-               arp_key->arp_sip = output->ipv4.addr.src;
-               arp_key->arp_tip = output->ipv4.addr.dst;
-               arp_key->arp_op = htons(output->ip.proto);
-               memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN);
-               memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN);
-       }
-
-       if ((swkey->eth.type == htons(ETH_P_IP) ||
-            swkey->eth.type == htons(ETH_P_IPV6)) &&
-            swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
-
-               if (swkey->ip.proto == IPPROTO_TCP) {
-                       struct ovs_key_tcp *tcp_key;
-
-                       nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
-                       if (!nla)
-                               goto nla_put_failure;
-                       tcp_key = nla_data(nla);
-                       if (swkey->eth.type == htons(ETH_P_IP)) {
-                               tcp_key->tcp_src = output->ipv4.tp.src;
-                               tcp_key->tcp_dst = output->ipv4.tp.dst;
-                       } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
-                               tcp_key->tcp_src = output->ipv6.tp.src;
-                               tcp_key->tcp_dst = output->ipv6.tp.dst;
-                       }
-               } else if (swkey->ip.proto == IPPROTO_UDP) {
-                       struct ovs_key_udp *udp_key;
-
-                       nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
-                       if (!nla)
-                               goto nla_put_failure;
-                       udp_key = nla_data(nla);
-                       if (swkey->eth.type == htons(ETH_P_IP)) {
-                               udp_key->udp_src = output->ipv4.tp.src;
-                               udp_key->udp_dst = output->ipv4.tp.dst;
-                       } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
-                               udp_key->udp_src = output->ipv6.tp.src;
-                               udp_key->udp_dst = output->ipv6.tp.dst;
-                       }
-               } else if (swkey->ip.proto == IPPROTO_SCTP) {
-                       struct ovs_key_sctp *sctp_key;
-
-                       nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
-                       if (!nla)
-                               goto nla_put_failure;
-                       sctp_key = nla_data(nla);
-                       if (swkey->eth.type == htons(ETH_P_IP)) {
-                               sctp_key->sctp_src = swkey->ipv4.tp.src;
-                               sctp_key->sctp_dst = swkey->ipv4.tp.dst;
-                       } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
-                               sctp_key->sctp_src = swkey->ipv6.tp.src;
-                               sctp_key->sctp_dst = swkey->ipv6.tp.dst;
-                       }
-               } else if (swkey->eth.type == htons(ETH_P_IP) &&
-                          swkey->ip.proto == IPPROTO_ICMP) {
-                       struct ovs_key_icmp *icmp_key;
-
-                       nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
-                       if (!nla)
-                               goto nla_put_failure;
-                       icmp_key = nla_data(nla);
-                       icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
-                       icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
-               } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
-                          swkey->ip.proto == IPPROTO_ICMPV6) {
-                       struct ovs_key_icmpv6 *icmpv6_key;
-
-                       nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
-                                               sizeof(*icmpv6_key));
-                       if (!nla)
-                               goto nla_put_failure;
-                       icmpv6_key = nla_data(nla);
-                       icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
-                       icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
-
-                       if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
-                           icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
-                               struct ovs_key_nd *nd_key;
-
-                               nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
-                               if (!nla)
-                                       goto nla_put_failure;
-                               nd_key = nla_data(nla);
-                               memcpy(nd_key->nd_target, &output->ipv6.nd.target,
-                                                       sizeof(nd_key->nd_target));
-                               memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN);
-                               memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN);
-                       }
-               }
-       }
-
-unencap:
-       if (encap)
-               nla_nest_end(skb, encap);
-
-       return 0;
-
-nla_put_failure:
-       return -EMSGSIZE;
-}
-
-/* Initializes the flow module.
- * Returns zero if successful or a negative error code. */
-int ovs_flow_init(void)
-{
-       BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
-       BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
-
-       flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
-                                       0, NULL);
-       if (flow_cache == NULL)
-               return -ENOMEM;
-
-       return 0;
-}
-
-/* Uninitializes the flow module. */
-void ovs_flow_exit(void)
-{
-       kmem_cache_destroy(flow_cache);
-}
-
-struct sw_flow_mask *ovs_sw_flow_mask_alloc(void)
-{
-       struct sw_flow_mask *mask;
-
-       mask = kmalloc(sizeof(*mask), GFP_KERNEL);
-       if (mask)
-               mask->ref_count = 0;
-
-       return mask;
-}
-
-void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask)
-{
-       mask->ref_count++;
-}
-
-static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu)
-{
-       struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu);
-
-       kfree(mask);
-}
-
-void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred)
-{
-       if (!mask)
-               return;
-
-       BUG_ON(!mask->ref_count);
-       mask->ref_count--;
-
-       if (!mask->ref_count) {
-               list_del_rcu(&mask->list);
-               if (deferred)
-                       call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb);
-               else
-                       kfree(mask);
-       }
-}
-
-static bool ovs_sw_flow_mask_equal(const struct sw_flow_mask *a,
-               const struct sw_flow_mask *b)
-{
-       u8 *a_ = (u8 *)&a->key + a->range.start;
-       u8 *b_ = (u8 *)&b->key + b->range.start;
-
-       return  (a->range.end == b->range.end)
-               && (a->range.start == b->range.start)
-               && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0);
-}
-
-struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl,
-                                           const struct sw_flow_mask *mask)
-{
-       struct list_head *ml;
-
-       list_for_each(ml, tbl->mask_list) {
-               struct sw_flow_mask *m;
-               m = container_of(ml, struct sw_flow_mask, list);
-               if (ovs_sw_flow_mask_equal(mask, m))
-                       return m;
-       }
-
-       return NULL;
-}
-
-/**
- * add a new mask into the mask list.
- * The caller needs to make sure that 'mask' is not the same
- * as any masks that are already on the list.
- */
-void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask)
-{
-       list_add_rcu(&mask->list, tbl->mask_list);
-}
-
-/**
- * Set 'range' fields in the mask to the value of 'val'.
- */
-static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
-               struct sw_flow_key_range *range, u8 val)
-{
-       u8 *m = (u8 *)&mask->key + range->start;
-
-       mask->range = *range;
-       memset(m, val, range_n_bytes(range));
-}
index 03eae03..91a3022 100644 (file)
 #include <net/ip_tunnels.h>
 
 struct sk_buff;
-struct sw_flow_mask;
-struct flow_table;
-
-struct sw_flow_actions {
-       struct rcu_head rcu;
-       u32 actions_len;
-       struct nlattr actions[];
-};
 
 /* Used to memset ovs_key_ipv4_tunnel padding. */
 #define OVS_TUNNEL_KEY_SIZE                                    \
@@ -129,6 +121,31 @@ struct sw_flow_key {
        };
 } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
 
+struct sw_flow_key_range {
+       size_t start;
+       size_t end;
+};
+
+struct sw_flow_mask {
+       int ref_count;
+       struct rcu_head rcu;
+       struct list_head list;
+       struct sw_flow_key_range range;
+       struct sw_flow_key key;
+};
+
+struct sw_flow_match {
+       struct sw_flow_key *key;
+       struct sw_flow_key_range range;
+       struct sw_flow_mask *mask;
+};
+
+struct sw_flow_actions {
+       struct rcu_head rcu;
+       u32 actions_len;
+       struct nlattr actions[];
+};
+
 struct sw_flow {
        struct rcu_head rcu;
        struct hlist_node hash_node[2];
@@ -146,20 +163,6 @@ struct sw_flow {
        u8 tcp_flags;           /* Union of seen TCP flags. */
 };
 
-struct sw_flow_key_range {
-       size_t start;
-       size_t end;
-};
-
-struct sw_flow_match {
-       struct sw_flow_key *key;
-       struct sw_flow_key_range range;
-       struct sw_flow_mask *mask;
-};
-
-void ovs_match_init(struct sw_flow_match *match,
-               struct sw_flow_key *key, struct sw_flow_mask *mask);
-
 struct arp_eth_header {
        __be16      ar_hrd;     /* format of hardware address   */
        __be16      ar_pro;     /* format of protocol address   */
@@ -174,88 +177,9 @@ struct arp_eth_header {
        unsigned char       ar_tip[4];          /* target IP address        */
 } __packed;
 
-int ovs_flow_init(void);
-void ovs_flow_exit(void);
-
-struct sw_flow *ovs_flow_alloc(void);
-void ovs_flow_deferred_free(struct sw_flow *);
-void ovs_flow_free(struct sw_flow *, bool deferred);
-
-struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len);
-void ovs_flow_deferred_free_acts(struct sw_flow_actions *);
-
-int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
 void ovs_flow_used(struct sw_flow *, struct sk_buff *);
 u64 ovs_flow_used_time(unsigned long flow_jiffies);
-int ovs_flow_to_nlattrs(const struct sw_flow_key *,
-               const struct sw_flow_key *, struct sk_buff *);
-int ovs_match_from_nlattrs(struct sw_flow_match *match,
-                     const struct nlattr *,
-                     const struct nlattr *);
-int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
-               const struct nlattr *attr);
 
-#define MAX_ACTIONS_BUFSIZE    (32 * 1024)
-#define TBL_MIN_BUCKETS                1024
-
-struct flow_table {
-       struct flex_array *buckets;
-       unsigned int count, n_buckets;
-       struct rcu_head rcu;
-       struct list_head *mask_list;
-       int node_ver;
-       u32 hash_seed;
-       bool keep_flows;
-};
-
-static inline int ovs_flow_tbl_count(struct flow_table *table)
-{
-       return table->count;
-}
-
-static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table)
-{
-       return (table->count > table->n_buckets);
-}
-
-struct sw_flow *ovs_flow_lookup(struct flow_table *,
-                               const struct sw_flow_key *);
-struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table,
-                                   struct sw_flow_match *match);
-
-void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred);
-struct flow_table *ovs_flow_tbl_alloc(int new_size);
-struct flow_table *ovs_flow_tbl_expand(struct flow_table *table);
-struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table);
-
-void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow);
-void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow);
-
-struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *idx);
-extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1];
-int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
-                            struct sw_flow_match *match, bool is_mask);
-int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
-                          const struct ovs_key_ipv4_tunnel *tun_key,
-                          const struct ovs_key_ipv4_tunnel *output);
-
-bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
-               const struct sw_flow_key *key, int key_end);
-
-struct sw_flow_mask {
-       int ref_count;
-       struct rcu_head rcu;
-       struct list_head list;
-       struct sw_flow_key_range range;
-       struct sw_flow_key key;
-};
+int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
 
-struct sw_flow_mask *ovs_sw_flow_mask_alloc(void);
-void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *);
-void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *, bool deferred);
-void ovs_sw_flow_mask_insert(struct flow_table *, struct sw_flow_mask *);
-struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *,
-               const struct sw_flow_mask *);
-void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
-                      const struct sw_flow_mask *mask);
 #endif /* flow.h */
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
new file mode 100644 (file)
index 0000000..515a9f6
--- /dev/null
@@ -0,0 +1,1603 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include "flow.h"
+#include "datapath.h"
+#include <linux/uaccess.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/llc_pdu.h>
+#include <linux/kernel.h>
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/llc.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/rcupdate.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/sctp.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/rculist.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+
+#include "flow_netlink.h"
+
+static void update_range__(struct sw_flow_match *match,
+                          size_t offset, size_t size, bool is_mask)
+{
+       struct sw_flow_key_range *range = NULL;
+       size_t start = rounddown(offset, sizeof(long));
+       size_t end = roundup(offset + size, sizeof(long));
+
+       if (!is_mask)
+               range = &match->range;
+       else if (match->mask)
+               range = &match->mask->range;
+
+       if (!range)
+               return;
+
+       if (range->start == range->end) {
+               range->start = start;
+               range->end = end;
+               return;
+       }
+
+       if (range->start > start)
+               range->start = start;
+
+       if (range->end < end)
+               range->end = end;
+}
+
+#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
+       do { \
+               update_range__(match, offsetof(struct sw_flow_key, field),  \
+                                    sizeof((match)->key->field), is_mask); \
+               if (is_mask) {                                              \
+                       if ((match)->mask)                                  \
+                               (match)->mask->key.field = value;           \
+               } else {                                                    \
+                       (match)->key->field = value;                        \
+               }                                                           \
+       } while (0)
+
+#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
+       do { \
+               update_range__(match, offsetof(struct sw_flow_key, field),  \
+                               len, is_mask);                              \
+               if (is_mask) {                                              \
+                       if ((match)->mask)                                  \
+                               memcpy(&(match)->mask->key.field, value_p, len);\
+               } else {                                                    \
+                       memcpy(&(match)->key->field, value_p, len);         \
+               }                                                           \
+       } while (0)
+
+static u16 range_n_bytes(const struct sw_flow_key_range *range)
+{
+       return range->end - range->start;
+}
+
+static bool match_validate(const struct sw_flow_match *match,
+                          u64 key_attrs, u64 mask_attrs)
+{
+       u64 key_expected = 1ULL << OVS_KEY_ATTR_ETHERNET;
+       u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
+
+       /* The following mask attributes allowed only if they
+        * pass the validation tests. */
+       mask_allowed &= ~((1ULL << OVS_KEY_ATTR_IPV4)
+                       | (1ULL << OVS_KEY_ATTR_IPV6)
+                       | (1ULL << OVS_KEY_ATTR_TCP)
+                       | (1ULL << OVS_KEY_ATTR_UDP)
+                       | (1ULL << OVS_KEY_ATTR_SCTP)
+                       | (1ULL << OVS_KEY_ATTR_ICMP)
+                       | (1ULL << OVS_KEY_ATTR_ICMPV6)
+                       | (1ULL << OVS_KEY_ATTR_ARP)
+                       | (1ULL << OVS_KEY_ATTR_ND));
+
+       /* Always allowed mask fields. */
+       mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL)
+                      | (1ULL << OVS_KEY_ATTR_IN_PORT)
+                      | (1ULL << OVS_KEY_ATTR_ETHERTYPE));
+
+       /* Check key attributes. */
+       if (match->key->eth.type == htons(ETH_P_ARP)
+                       || match->key->eth.type == htons(ETH_P_RARP)) {
+               key_expected |= 1ULL << OVS_KEY_ATTR_ARP;
+               if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+                       mask_allowed |= 1ULL << OVS_KEY_ATTR_ARP;
+       }
+
+       if (match->key->eth.type == htons(ETH_P_IP)) {
+               key_expected |= 1ULL << OVS_KEY_ATTR_IPV4;
+               if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+                       mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV4;
+
+               if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
+                       if (match->key->ip.proto == IPPROTO_UDP) {
+                               key_expected |= 1ULL << OVS_KEY_ATTR_UDP;
+                               if (match->mask && (match->mask->key.ip.proto == 0xff))
+                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
+                       }
+
+                       if (match->key->ip.proto == IPPROTO_SCTP) {
+                               key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
+                               if (match->mask && (match->mask->key.ip.proto == 0xff))
+                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
+                       }
+
+                       if (match->key->ip.proto == IPPROTO_TCP) {
+                               key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
+                               if (match->mask && (match->mask->key.ip.proto == 0xff))
+                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP;
+                       }
+
+                       if (match->key->ip.proto == IPPROTO_ICMP) {
+                               key_expected |= 1ULL << OVS_KEY_ATTR_ICMP;
+                               if (match->mask && (match->mask->key.ip.proto == 0xff))
+                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMP;
+                       }
+               }
+       }
+
+       if (match->key->eth.type == htons(ETH_P_IPV6)) {
+               key_expected |= 1ULL << OVS_KEY_ATTR_IPV6;
+               if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+                       mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV6;
+
+               if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
+                       if (match->key->ip.proto == IPPROTO_UDP) {
+                               key_expected |= 1ULL << OVS_KEY_ATTR_UDP;
+                               if (match->mask && (match->mask->key.ip.proto == 0xff))
+                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
+                       }
+
+                       if (match->key->ip.proto == IPPROTO_SCTP) {
+                               key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
+                               if (match->mask && (match->mask->key.ip.proto == 0xff))
+                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
+                       }
+
+                       if (match->key->ip.proto == IPPROTO_TCP) {
+                               key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
+                               if (match->mask && (match->mask->key.ip.proto == 0xff))
+                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP;
+                       }
+
+                       if (match->key->ip.proto == IPPROTO_ICMPV6) {
+                               key_expected |= 1ULL << OVS_KEY_ATTR_ICMPV6;
+                               if (match->mask && (match->mask->key.ip.proto == 0xff))
+                                       mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMPV6;
+
+                               if (match->key->ipv6.tp.src ==
+                                               htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+                                   match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+                                       key_expected |= 1ULL << OVS_KEY_ATTR_ND;
+                                       if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
+                                               mask_allowed |= 1ULL << OVS_KEY_ATTR_ND;
+                               }
+                       }
+               }
+       }
+
+       if ((key_attrs & key_expected) != key_expected) {
+               /* Key attributes check failed. */
+               OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
+                               key_attrs, key_expected);
+               return false;
+       }
+
+       if ((mask_attrs & mask_allowed) != mask_attrs) {
+               /* Mask attributes check failed. */
+               OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
+                               mask_attrs, mask_allowed);
+               return false;
+       }
+
+       return true;
+}
+
+/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
+static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
+       [OVS_KEY_ATTR_ENCAP] = -1,
+       [OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
+       [OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
+       [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
+       [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
+       [OVS_KEY_ATTR_VLAN] = sizeof(__be16),
+       [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
+       [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
+       [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
+       [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
+       [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
+       [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
+       [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
+       [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
+       [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
+       [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
+       [OVS_KEY_ATTR_TUNNEL] = -1,
+};
+
+static bool is_all_zero(const u8 *fp, size_t size)
+{
+       int i;
+
+       if (!fp)
+               return false;
+
+       for (i = 0; i < size; i++)
+               if (fp[i])
+                       return false;
+
+       return true;
+}
+
+static int __parse_flow_nlattrs(const struct nlattr *attr,
+                               const struct nlattr *a[],
+                               u64 *attrsp, bool nz)
+{
+       const struct nlattr *nla;
+       u64 attrs;
+       int rem;
+
+       attrs = *attrsp;
+       nla_for_each_nested(nla, attr, rem) {
+               u16 type = nla_type(nla);
+               int expected_len;
+
+               if (type > OVS_KEY_ATTR_MAX) {
+                       OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
+                                 type, OVS_KEY_ATTR_MAX);
+                       return -EINVAL;
+               }
+
+               if (attrs & (1ULL << type)) {
+                       OVS_NLERR("Duplicate key attribute (type %d).\n", type);
+                       return -EINVAL;
+               }
+
+               expected_len = ovs_key_lens[type];
+               if (nla_len(nla) != expected_len && expected_len != -1) {
+                       OVS_NLERR("Key attribute has unexpected length (type=%d"
+                                 ", length=%d, expected=%d).\n", type,
+                                 nla_len(nla), expected_len);
+                       return -EINVAL;
+               }
+
+               if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
+                       attrs |= 1ULL << type;
+                       a[type] = nla;
+               }
+       }
+       if (rem) {
+               OVS_NLERR("Message has %d unknown bytes.\n", rem);
+               return -EINVAL;
+       }
+
+       *attrsp = attrs;
+       return 0;
+}
+
+static int parse_flow_mask_nlattrs(const struct nlattr *attr,
+                                  const struct nlattr *a[], u64 *attrsp)
+{
+       return __parse_flow_nlattrs(attr, a, attrsp, true);
+}
+
+static int parse_flow_nlattrs(const struct nlattr *attr,
+                             const struct nlattr *a[], u64 *attrsp)
+{
+       return __parse_flow_nlattrs(attr, a, attrsp, false);
+}
+
+static int ipv4_tun_from_nlattr(const struct nlattr *attr,
+                               struct sw_flow_match *match, bool is_mask)
+{
+       struct nlattr *a;
+       int rem;
+       bool ttl = false;
+       __be16 tun_flags = 0;
+
+       nla_for_each_nested(a, attr, rem) {
+               int type = nla_type(a);
+               static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
+                       [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
+                       [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
+                       [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
+                       [OVS_TUNNEL_KEY_ATTR_TOS] = 1,
+                       [OVS_TUNNEL_KEY_ATTR_TTL] = 1,
+                       [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
+                       [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
+               };
+
+               if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
+                       OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
+                       type, OVS_TUNNEL_KEY_ATTR_MAX);
+                       return -EINVAL;
+               }
+
+               if (ovs_tunnel_key_lens[type] != nla_len(a)) {
+                       OVS_NLERR("IPv4 tunnel attribute type has unexpected "
+                                 " length (type=%d, length=%d, expected=%d).\n",
+                                 type, nla_len(a), ovs_tunnel_key_lens[type]);
+                       return -EINVAL;
+               }
+
+               switch (type) {
+               case OVS_TUNNEL_KEY_ATTR_ID:
+                       SW_FLOW_KEY_PUT(match, tun_key.tun_id,
+                                       nla_get_be64(a), is_mask);
+                       tun_flags |= TUNNEL_KEY;
+                       break;
+               case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
+                       SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
+                                       nla_get_be32(a), is_mask);
+                       break;
+               case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
+                       SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
+                                       nla_get_be32(a), is_mask);
+                       break;
+               case OVS_TUNNEL_KEY_ATTR_TOS:
+                       SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
+                                       nla_get_u8(a), is_mask);
+                       break;
+               case OVS_TUNNEL_KEY_ATTR_TTL:
+                       SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
+                                       nla_get_u8(a), is_mask);
+                       ttl = true;
+                       break;
+               case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
+                       tun_flags |= TUNNEL_DONT_FRAGMENT;
+                       break;
+               case OVS_TUNNEL_KEY_ATTR_CSUM:
+                       tun_flags |= TUNNEL_CSUM;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+       }
+
+       SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
+
+       if (rem > 0) {
+               OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
+               return -EINVAL;
+       }
+
+       if (!is_mask) {
+               if (!match->key->tun_key.ipv4_dst) {
+                       OVS_NLERR("IPv4 tunnel destination address is zero.\n");
+                       return -EINVAL;
+               }
+
+               if (!ttl) {
+                       OVS_NLERR("IPv4 tunnel TTL not specified.\n");
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+static int ipv4_tun_to_nlattr(struct sk_buff *skb,
+                             const struct ovs_key_ipv4_tunnel *tun_key,
+                             const struct ovs_key_ipv4_tunnel *output)
+{
+       struct nlattr *nla;
+
+       nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
+       if (!nla)
+               return -EMSGSIZE;
+
+       if (output->tun_flags & TUNNEL_KEY &&
+           nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
+               return -EMSGSIZE;
+       if (output->ipv4_src &&
+               nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
+               return -EMSGSIZE;
+       if (output->ipv4_dst &&
+               nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
+               return -EMSGSIZE;
+       if (output->ipv4_tos &&
+               nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
+               return -EMSGSIZE;
+       if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
+               return -EMSGSIZE;
+       if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
+               nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
+               return -EMSGSIZE;
+       if ((output->tun_flags & TUNNEL_CSUM) &&
+               nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
+               return -EMSGSIZE;
+
+       nla_nest_end(skb, nla);
+       return 0;
+}
+
+
+static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
+                                const struct nlattr **a, bool is_mask)
+{
+       if (*attrs & (1ULL << OVS_KEY_ATTR_PRIORITY)) {
+               SW_FLOW_KEY_PUT(match, phy.priority,
+                         nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
+               *attrs &= ~(1ULL << OVS_KEY_ATTR_PRIORITY);
+       }
+
+       if (*attrs & (1ULL << OVS_KEY_ATTR_IN_PORT)) {
+               u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
+
+               if (is_mask)
+                       in_port = 0xffffffff; /* Always exact match in_port. */
+               else if (in_port >= DP_MAX_PORTS)
+                       return -EINVAL;
+
+               SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
+               *attrs &= ~(1ULL << OVS_KEY_ATTR_IN_PORT);
+       } else if (!is_mask) {
+               SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
+       }
+
+       if (*attrs & (1ULL << OVS_KEY_ATTR_SKB_MARK)) {
+               uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
+
+               SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
+               *attrs &= ~(1ULL << OVS_KEY_ATTR_SKB_MARK);
+       }
+       if (*attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) {
+               if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
+                                        is_mask))
+                       return -EINVAL;
+               *attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL);
+       }
+       return 0;
+}
+
+static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
+                               const struct nlattr **a, bool is_mask)
+{
+       int err;
+       u64 orig_attrs = attrs;
+
+       err = metadata_from_nlattrs(match, &attrs, a, is_mask);
+       if (err)
+               return err;
+
+       if (attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) {
+               const struct ovs_key_ethernet *eth_key;
+
+               eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
+               SW_FLOW_KEY_MEMCPY(match, eth.src,
+                               eth_key->eth_src, ETH_ALEN, is_mask);
+               SW_FLOW_KEY_MEMCPY(match, eth.dst,
+                               eth_key->eth_dst, ETH_ALEN, is_mask);
+               attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERNET);
+       }
+
+       if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) {
+               __be16 tci;
+
+               tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+               if (!(tci & htons(VLAN_TAG_PRESENT))) {
+                       if (is_mask)
+                               OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
+                       else
+                               OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
+
+                       return -EINVAL;
+               }
+
+               SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
+               attrs &= ~(1ULL << OVS_KEY_ATTR_VLAN);
+       } else if (!is_mask)
+               SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
+
+       if (attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) {
+               __be16 eth_type;
+
+               eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+               if (is_mask) {
+                       /* Always exact match EtherType. */
+                       eth_type = htons(0xffff);
+               } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
+                       OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
+                                       ntohs(eth_type), ETH_P_802_3_MIN);
+                       return -EINVAL;
+               }
+
+               SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
+               attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
+       } else if (!is_mask) {
+               SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
+       }
+
+       if (attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
+               const struct ovs_key_ipv4 *ipv4_key;
+
+               ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
+               if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
+                       OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
+                               ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
+                       return -EINVAL;
+               }
+               SW_FLOW_KEY_PUT(match, ip.proto,
+                               ipv4_key->ipv4_proto, is_mask);
+               SW_FLOW_KEY_PUT(match, ip.tos,
+                               ipv4_key->ipv4_tos, is_mask);
+               SW_FLOW_KEY_PUT(match, ip.ttl,
+                               ipv4_key->ipv4_ttl, is_mask);
+               SW_FLOW_KEY_PUT(match, ip.frag,
+                               ipv4_key->ipv4_frag, is_mask);
+               SW_FLOW_KEY_PUT(match, ipv4.addr.src,
+                               ipv4_key->ipv4_src, is_mask);
+               SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
+                               ipv4_key->ipv4_dst, is_mask);
+               attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4);
+       }
+
+       if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) {
+               const struct ovs_key_ipv6 *ipv6_key;
+
+               ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
+               if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
+                       OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
+                               ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
+                       return -EINVAL;
+               }
+               SW_FLOW_KEY_PUT(match, ipv6.label,
+                               ipv6_key->ipv6_label, is_mask);
+               SW_FLOW_KEY_PUT(match, ip.proto,
+                               ipv6_key->ipv6_proto, is_mask);
+               SW_FLOW_KEY_PUT(match, ip.tos,
+                               ipv6_key->ipv6_tclass, is_mask);
+               SW_FLOW_KEY_PUT(match, ip.ttl,
+                               ipv6_key->ipv6_hlimit, is_mask);
+               SW_FLOW_KEY_PUT(match, ip.frag,
+                               ipv6_key->ipv6_frag, is_mask);
+               SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
+                               ipv6_key->ipv6_src,
+                               sizeof(match->key->ipv6.addr.src),
+                               is_mask);
+               SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
+                               ipv6_key->ipv6_dst,
+                               sizeof(match->key->ipv6.addr.dst),
+                               is_mask);
+
+               attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6);
+       }
+
+       if (attrs & (1ULL << OVS_KEY_ATTR_ARP)) {
+               const struct ovs_key_arp *arp_key;
+
+               arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
+               if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
+                       OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
+                                 arp_key->arp_op);
+                       return -EINVAL;
+               }
+
+               SW_FLOW_KEY_PUT(match, ipv4.addr.src,
+                               arp_key->arp_sip, is_mask);
+               SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
+                       arp_key->arp_tip, is_mask);
+               SW_FLOW_KEY_PUT(match, ip.proto,
+                               ntohs(arp_key->arp_op), is_mask);
+               SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
+                               arp_key->arp_sha, ETH_ALEN, is_mask);
+               SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
+                               arp_key->arp_tha, ETH_ALEN, is_mask);
+
+               attrs &= ~(1ULL << OVS_KEY_ATTR_ARP);
+       }
+
+       if (attrs & (1ULL << OVS_KEY_ATTR_TCP)) {
+               const struct ovs_key_tcp *tcp_key;
+
+               tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
+               if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
+                       SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+                                       tcp_key->tcp_src, is_mask);
+                       SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+                                       tcp_key->tcp_dst, is_mask);
+               } else {
+                       SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+                                       tcp_key->tcp_src, is_mask);
+                       SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+                                       tcp_key->tcp_dst, is_mask);
+               }
+               attrs &= ~(1ULL << OVS_KEY_ATTR_TCP);
+       }
+
+       if (attrs & (1ULL << OVS_KEY_ATTR_UDP)) {
+               const struct ovs_key_udp *udp_key;
+
+               udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
+               if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
+                       SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+                                       udp_key->udp_src, is_mask);
+                       SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+                                       udp_key->udp_dst, is_mask);
+               } else {
+                       SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+                                       udp_key->udp_src, is_mask);
+                       SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+                                       udp_key->udp_dst, is_mask);
+               }
+               attrs &= ~(1ULL << OVS_KEY_ATTR_UDP);
+       }
+
+       if (attrs & (1ULL << OVS_KEY_ATTR_SCTP)) {
+               const struct ovs_key_sctp *sctp_key;
+
+               sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
+               if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
+                       SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+                                       sctp_key->sctp_src, is_mask);
+                       SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+                                       sctp_key->sctp_dst, is_mask);
+               } else {
+                       SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+                                       sctp_key->sctp_src, is_mask);
+                       SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+                                       sctp_key->sctp_dst, is_mask);
+               }
+               attrs &= ~(1ULL << OVS_KEY_ATTR_SCTP);
+       }
+
+       if (attrs & (1ULL << OVS_KEY_ATTR_ICMP)) {
+               const struct ovs_key_icmp *icmp_key;
+
+               icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
+               SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+                               htons(icmp_key->icmp_type), is_mask);
+               SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+                               htons(icmp_key->icmp_code), is_mask);
+               attrs &= ~(1ULL << OVS_KEY_ATTR_ICMP);
+       }
+
+       if (attrs & (1ULL << OVS_KEY_ATTR_ICMPV6)) {
+               const struct ovs_key_icmpv6 *icmpv6_key;
+
+               icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
+               SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+                               htons(icmpv6_key->icmpv6_type), is_mask);
+               SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+                               htons(icmpv6_key->icmpv6_code), is_mask);
+               attrs &= ~(1ULL << OVS_KEY_ATTR_ICMPV6);
+       }
+
+       if (attrs & (1ULL << OVS_KEY_ATTR_ND)) {
+               const struct ovs_key_nd *nd_key;
+
+               nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
+               SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
+                       nd_key->nd_target,
+                       sizeof(match->key->ipv6.nd.target),
+                       is_mask);
+               SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
+                       nd_key->nd_sll, ETH_ALEN, is_mask);
+               SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
+                               nd_key->nd_tll, ETH_ALEN, is_mask);
+               attrs &= ~(1ULL << OVS_KEY_ATTR_ND);
+       }
+
+       if (attrs != 0)
+               return -EINVAL;
+
+       return 0;
+}
+
+static void sw_flow_mask_set(struct sw_flow_mask *mask,
+                            struct sw_flow_key_range *range, u8 val)
+{
+       u8 *m = (u8 *)&mask->key + range->start;
+
+       mask->range = *range;
+       memset(m, val, range_n_bytes(range));
+}
+
+/**
+ * ovs_nla_get_match - parses Netlink attributes into a flow key and
+ * mask. In case the 'mask' is NULL, the flow is treated as exact match
+ * flow. Otherwise, it is treated as a wildcarded flow, except the mask
+ * does not include any don't care bit.
+ * @match: receives the extracted flow match information.
+ * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * sequence. The fields should of the packet that triggered the creation
+ * of this flow.
+ * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
+ * attribute specifies the mask field of the wildcarded flow.
+ */
+int ovs_nla_get_match(struct sw_flow_match *match,
+                     const struct nlattr *key,
+                     const struct nlattr *mask)
+{
+       const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+       const struct nlattr *encap;
+       u64 key_attrs = 0;
+       u64 mask_attrs = 0;
+       bool encap_valid = false;
+       int err;
+
+       err = parse_flow_nlattrs(key, a, &key_attrs);
+       if (err)
+               return err;
+
+       if ((key_attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
+           (key_attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) &&
+           (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
+               __be16 tci;
+
+               if (!((key_attrs & (1ULL << OVS_KEY_ATTR_VLAN)) &&
+                     (key_attrs & (1ULL << OVS_KEY_ATTR_ENCAP)))) {
+                       OVS_NLERR("Invalid Vlan frame.\n");
+                       return -EINVAL;
+               }
+
+               key_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
+               tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+               encap = a[OVS_KEY_ATTR_ENCAP];
+               key_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP);
+               encap_valid = true;
+
+               if (tci & htons(VLAN_TAG_PRESENT)) {
+                       err = parse_flow_nlattrs(encap, a, &key_attrs);
+                       if (err)
+                               return err;
+               } else if (!tci) {
+                       /* Corner case for truncated 802.1Q header. */
+                       if (nla_len(encap)) {
+                               OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
+                               return -EINVAL;
+                       }
+               } else {
+                       OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
+                       return  -EINVAL;
+               }
+       }
+
+       err = ovs_key_from_nlattrs(match, key_attrs, a, false);
+       if (err)
+               return err;
+
+       if (mask) {
+               err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
+               if (err)
+                       return err;
+
+               if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP)  {
+                       __be16 eth_type = 0;
+                       __be16 tci = 0;
+
+                       if (!encap_valid) {
+                               OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
+                               return  -EINVAL;
+                       }
+
+                       mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP);
+                       if (a[OVS_KEY_ATTR_ETHERTYPE])
+                               eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+
+                       if (eth_type == htons(0xffff)) {
+                               mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
+                               encap = a[OVS_KEY_ATTR_ENCAP];
+                               err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
+                       } else {
+                               OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
+                                               ntohs(eth_type));
+                               return -EINVAL;
+                       }
+
+                       if (a[OVS_KEY_ATTR_VLAN])
+                               tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+                       if (!(tci & htons(VLAN_TAG_PRESENT))) {
+                               OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
+                               return -EINVAL;
+                       }
+               }
+
+               err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
+               if (err)
+                       return err;
+       } else {
+               /* Populate exact match flow's key mask. */
+               if (match->mask)
+                       sw_flow_mask_set(match->mask, &match->range, 0xff);
+       }
+
+       if (!match_validate(match, key_attrs, mask_attrs))
+               return -EINVAL;
+
+       return 0;
+}
+
+/**
+ * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
+ * @flow: Receives extracted in_port, priority, tun_key and skb_mark.
+ * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * sequence.
+ *
+ * This parses a series of Netlink attributes that form a flow key, which must
+ * take the same form accepted by flow_from_nlattrs(), but only enough of it to
+ * get the metadata, that is, the parts of the flow key that cannot be
+ * extracted from the packet itself.
+ */
+
+int ovs_nla_get_flow_metadata(struct sw_flow *flow,
+                             const struct nlattr *attr)
+{
+       struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
+       const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+       u64 attrs = 0;
+       int err;
+       struct sw_flow_match match;
+
+       flow->key.phy.in_port = DP_MAX_PORTS;
+       flow->key.phy.priority = 0;
+       flow->key.phy.skb_mark = 0;
+       memset(tun_key, 0, sizeof(flow->key.tun_key));
+
+       err = parse_flow_nlattrs(attr, a, &attrs);
+       if (err)
+               return -EINVAL;
+
+       memset(&match, 0, sizeof(match));
+       match.key = &flow->key;
+
+       err = metadata_from_nlattrs(&match, &attrs, a, false);
+       if (err)
+               return err;
+
+       return 0;
+}
+
+int ovs_nla_put_flow(const struct sw_flow_key *swkey,
+                    const struct sw_flow_key *output, struct sk_buff *skb)
+{
+       struct ovs_key_ethernet *eth_key;
+       struct nlattr *nla, *encap;
+       bool is_mask = (swkey != output);
+
+       if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
+               goto nla_put_failure;
+
+       if ((swkey->tun_key.ipv4_dst || is_mask) &&
+           ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
+               goto nla_put_failure;
+
+       if (swkey->phy.in_port == DP_MAX_PORTS) {
+               if (is_mask && (output->phy.in_port == 0xffff))
+                       if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
+                               goto nla_put_failure;
+       } else {
+               u16 upper_u16;
+               upper_u16 = !is_mask ? 0 : 0xffff;
+
+               if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
+                               (upper_u16 << 16) | output->phy.in_port))
+                       goto nla_put_failure;
+       }
+
+       if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
+               goto nla_put_failure;
+
+       nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
+       if (!nla)
+               goto nla_put_failure;
+
+       eth_key = nla_data(nla);
+       memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
+       memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
+
+       if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
+               __be16 eth_type;
+               eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
+               if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
+                   nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
+                       goto nla_put_failure;
+               encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+               if (!swkey->eth.tci)
+                       goto unencap;
+       } else
+               encap = NULL;
+
+       if (swkey->eth.type == htons(ETH_P_802_2)) {
+               /*
+                * Ethertype 802.2 is represented in the netlink with omitted
+                * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
+                * 0xffff in the mask attribute.  Ethertype can also
+                * be wildcarded.
+                */
+               if (is_mask && output->eth.type)
+                       if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
+                                               output->eth.type))
+                               goto nla_put_failure;
+               goto unencap;
+       }
+
+       if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
+               goto nla_put_failure;
+
+       if (swkey->eth.type == htons(ETH_P_IP)) {
+               struct ovs_key_ipv4 *ipv4_key;
+
+               nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
+               if (!nla)
+                       goto nla_put_failure;
+               ipv4_key = nla_data(nla);
+               ipv4_key->ipv4_src = output->ipv4.addr.src;
+               ipv4_key->ipv4_dst = output->ipv4.addr.dst;
+               ipv4_key->ipv4_proto = output->ip.proto;
+               ipv4_key->ipv4_tos = output->ip.tos;
+               ipv4_key->ipv4_ttl = output->ip.ttl;
+               ipv4_key->ipv4_frag = output->ip.frag;
+       } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+               struct ovs_key_ipv6 *ipv6_key;
+
+               nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
+               if (!nla)
+                       goto nla_put_failure;
+               ipv6_key = nla_data(nla);
+               memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
+                               sizeof(ipv6_key->ipv6_src));
+               memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
+                               sizeof(ipv6_key->ipv6_dst));
+               ipv6_key->ipv6_label = output->ipv6.label;
+               ipv6_key->ipv6_proto = output->ip.proto;
+               ipv6_key->ipv6_tclass = output->ip.tos;
+               ipv6_key->ipv6_hlimit = output->ip.ttl;
+               ipv6_key->ipv6_frag = output->ip.frag;
+       } else if (swkey->eth.type == htons(ETH_P_ARP) ||
+                  swkey->eth.type == htons(ETH_P_RARP)) {
+               struct ovs_key_arp *arp_key;
+
+               nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
+               if (!nla)
+                       goto nla_put_failure;
+               arp_key = nla_data(nla);
+               memset(arp_key, 0, sizeof(struct ovs_key_arp));
+               arp_key->arp_sip = output->ipv4.addr.src;
+               arp_key->arp_tip = output->ipv4.addr.dst;
+               arp_key->arp_op = htons(output->ip.proto);
+               memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN);
+               memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN);
+       }
+
+       if ((swkey->eth.type == htons(ETH_P_IP) ||
+            swkey->eth.type == htons(ETH_P_IPV6)) &&
+            swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
+
+               if (swkey->ip.proto == IPPROTO_TCP) {
+                       struct ovs_key_tcp *tcp_key;
+
+                       nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
+                       if (!nla)
+                               goto nla_put_failure;
+                       tcp_key = nla_data(nla);
+                       if (swkey->eth.type == htons(ETH_P_IP)) {
+                               tcp_key->tcp_src = output->ipv4.tp.src;
+                               tcp_key->tcp_dst = output->ipv4.tp.dst;
+                       } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+                               tcp_key->tcp_src = output->ipv6.tp.src;
+                               tcp_key->tcp_dst = output->ipv6.tp.dst;
+                       }
+               } else if (swkey->ip.proto == IPPROTO_UDP) {
+                       struct ovs_key_udp *udp_key;
+
+                       nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
+                       if (!nla)
+                               goto nla_put_failure;
+                       udp_key = nla_data(nla);
+                       if (swkey->eth.type == htons(ETH_P_IP)) {
+                               udp_key->udp_src = output->ipv4.tp.src;
+                               udp_key->udp_dst = output->ipv4.tp.dst;
+                       } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+                               udp_key->udp_src = output->ipv6.tp.src;
+                               udp_key->udp_dst = output->ipv6.tp.dst;
+                       }
+               } else if (swkey->ip.proto == IPPROTO_SCTP) {
+                       struct ovs_key_sctp *sctp_key;
+
+                       nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
+                       if (!nla)
+                               goto nla_put_failure;
+                       sctp_key = nla_data(nla);
+                       if (swkey->eth.type == htons(ETH_P_IP)) {
+                               sctp_key->sctp_src = swkey->ipv4.tp.src;
+                               sctp_key->sctp_dst = swkey->ipv4.tp.dst;
+                       } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+                               sctp_key->sctp_src = swkey->ipv6.tp.src;
+                               sctp_key->sctp_dst = swkey->ipv6.tp.dst;
+                       }
+               } else if (swkey->eth.type == htons(ETH_P_IP) &&
+                          swkey->ip.proto == IPPROTO_ICMP) {
+                       struct ovs_key_icmp *icmp_key;
+
+                       nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
+                       if (!nla)
+                               goto nla_put_failure;
+                       icmp_key = nla_data(nla);
+                       icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
+                       icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
+               } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
+                          swkey->ip.proto == IPPROTO_ICMPV6) {
+                       struct ovs_key_icmpv6 *icmpv6_key;
+
+                       nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
+                                               sizeof(*icmpv6_key));
+                       if (!nla)
+                               goto nla_put_failure;
+                       icmpv6_key = nla_data(nla);
+                       icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
+                       icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
+
+                       if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+                           icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
+                               struct ovs_key_nd *nd_key;
+
+                               nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
+                               if (!nla)
+                                       goto nla_put_failure;
+                               nd_key = nla_data(nla);
+                               memcpy(nd_key->nd_target, &output->ipv6.nd.target,
+                                                       sizeof(nd_key->nd_target));
+                               memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN);
+                               memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN);
+                       }
+               }
+       }
+
+unencap:
+       if (encap)
+               nla_nest_end(skb, encap);
+
+       return 0;
+
+nla_put_failure:
+       return -EMSGSIZE;
+}
+
+#define MAX_ACTIONS_BUFSIZE    (32 * 1024)
+
+struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size)
+{
+       struct sw_flow_actions *sfa;
+
+       if (size > MAX_ACTIONS_BUFSIZE)
+               return ERR_PTR(-EINVAL);
+
+       sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
+       if (!sfa)
+               return ERR_PTR(-ENOMEM);
+
+       sfa->actions_len = 0;
+       return sfa;
+}
+
+/* RCU callback used by ovs_nla_free_flow_actions. */
+static void rcu_free_acts_callback(struct rcu_head *rcu)
+{
+       struct sw_flow_actions *sf_acts = container_of(rcu,
+                       struct sw_flow_actions, rcu);
+       kfree(sf_acts);
+}
+
+/* Schedules 'sf_acts' to be freed after the next RCU grace period.
+ * The caller must hold rcu_read_lock for this to be sensible. */
+void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+{
+       call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
+}
+
+static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
+                                      int attr_len)
+{
+
+       struct sw_flow_actions *acts;
+       int new_acts_size;
+       int req_size = NLA_ALIGN(attr_len);
+       int next_offset = offsetof(struct sw_flow_actions, actions) +
+                                       (*sfa)->actions_len;
+
+       if (req_size <= (ksize(*sfa) - next_offset))
+               goto out;
+
+       new_acts_size = ksize(*sfa) * 2;
+
+       if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
+               if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
+                       return ERR_PTR(-EMSGSIZE);
+               new_acts_size = MAX_ACTIONS_BUFSIZE;
+       }
+
+       acts = ovs_nla_alloc_flow_actions(new_acts_size);
+       if (IS_ERR(acts))
+               return (void *)acts;
+
+       memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
+       acts->actions_len = (*sfa)->actions_len;
+       kfree(*sfa);
+       *sfa = acts;
+
+out:
+       (*sfa)->actions_len += req_size;
+       return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
+}
+
+static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
+{
+       struct nlattr *a;
+
+       a = reserve_sfa_size(sfa, nla_attr_size(len));
+       if (IS_ERR(a))
+               return PTR_ERR(a);
+
+       a->nla_type = attrtype;
+       a->nla_len = nla_attr_size(len);
+
+       if (data)
+               memcpy(nla_data(a), data, len);
+       memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
+
+       return 0;
+}
+
+static inline int add_nested_action_start(struct sw_flow_actions **sfa,
+                                         int attrtype)
+{
+       int used = (*sfa)->actions_len;
+       int err;
+
+       err = add_action(sfa, attrtype, NULL, 0);
+       if (err)
+               return err;
+
+       return used;
+}
+
+static inline void add_nested_action_end(struct sw_flow_actions *sfa,
+                                        int st_offset)
+{
+       struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
+                                                              st_offset);
+
+       a->nla_len = sfa->actions_len - st_offset;
+}
+
+static int validate_and_copy_sample(const struct nlattr *attr,
+                                   const struct sw_flow_key *key, int depth,
+                                   struct sw_flow_actions **sfa)
+{
+       const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
+       const struct nlattr *probability, *actions;
+       const struct nlattr *a;
+       int rem, start, err, st_acts;
+
+       memset(attrs, 0, sizeof(attrs));
+       nla_for_each_nested(a, attr, rem) {
+               int type = nla_type(a);
+               if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
+                       return -EINVAL;
+               attrs[type] = a;
+       }
+       if (rem)
+               return -EINVAL;
+
+       probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
+       if (!probability || nla_len(probability) != sizeof(u32))
+               return -EINVAL;
+
+       actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
+       if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
+               return -EINVAL;
+
+       /* validation done, copy sample action. */
+       start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
+       if (start < 0)
+               return start;
+       err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
+                        nla_data(probability), sizeof(u32));
+       if (err)
+               return err;
+       st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
+       if (st_acts < 0)
+               return st_acts;
+
+       err = ovs_nla_copy_actions(actions, key, depth + 1, sfa);
+       if (err)
+               return err;
+
+       add_nested_action_end(*sfa, st_acts);
+       add_nested_action_end(*sfa, start);
+
+       return 0;
+}
+
+static int validate_tp_port(const struct sw_flow_key *flow_key)
+{
+       if (flow_key->eth.type == htons(ETH_P_IP)) {
+               if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
+                       return 0;
+       } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
+               if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
+                       return 0;
+       }
+
+       return -EINVAL;
+}
+
+void ovs_match_init(struct sw_flow_match *match,
+                   struct sw_flow_key *key,
+                   struct sw_flow_mask *mask)
+{
+       memset(match, 0, sizeof(*match));
+       match->key = key;
+       match->mask = mask;
+
+       memset(key, 0, sizeof(*key));
+
+       if (mask) {
+               memset(&mask->key, 0, sizeof(mask->key));
+               mask->range.start = mask->range.end = 0;
+       }
+}
+
+static int validate_and_copy_set_tun(const struct nlattr *attr,
+                                    struct sw_flow_actions **sfa)
+{
+       struct sw_flow_match match;
+       struct sw_flow_key key;
+       int err, start;
+
+       ovs_match_init(&match, &key, NULL);
+       err = ipv4_tun_from_nlattr(nla_data(attr), &match, false);
+       if (err)
+               return err;
+
+       start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
+       if (start < 0)
+               return start;
+
+       err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
+                       sizeof(match.key->tun_key));
+       add_nested_action_end(*sfa, start);
+
+       return err;
+}
+
+static int validate_set(const struct nlattr *a,
+                       const struct sw_flow_key *flow_key,
+                       struct sw_flow_actions **sfa,
+                       bool *set_tun)
+{
+       const struct nlattr *ovs_key = nla_data(a);
+       int key_type = nla_type(ovs_key);
+
+       /* There can be only one key in a action */
+       if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
+               return -EINVAL;
+
+       if (key_type > OVS_KEY_ATTR_MAX ||
+           (ovs_key_lens[key_type] != nla_len(ovs_key) &&
+            ovs_key_lens[key_type] != -1))
+               return -EINVAL;
+
+       switch (key_type) {
+       const struct ovs_key_ipv4 *ipv4_key;
+       const struct ovs_key_ipv6 *ipv6_key;
+       int err;
+
+       case OVS_KEY_ATTR_PRIORITY:
+       case OVS_KEY_ATTR_SKB_MARK:
+       case OVS_KEY_ATTR_ETHERNET:
+               break;
+
+       case OVS_KEY_ATTR_TUNNEL:
+               *set_tun = true;
+               err = validate_and_copy_set_tun(a, sfa);
+               if (err)
+                       return err;
+               break;
+
+       case OVS_KEY_ATTR_IPV4:
+               if (flow_key->eth.type != htons(ETH_P_IP))
+                       return -EINVAL;
+
+               if (!flow_key->ip.proto)
+                       return -EINVAL;
+
+               ipv4_key = nla_data(ovs_key);
+               if (ipv4_key->ipv4_proto != flow_key->ip.proto)
+                       return -EINVAL;
+
+               if (ipv4_key->ipv4_frag != flow_key->ip.frag)
+                       return -EINVAL;
+
+               break;
+
+       case OVS_KEY_ATTR_IPV6:
+               if (flow_key->eth.type != htons(ETH_P_IPV6))
+                       return -EINVAL;
+
+               if (!flow_key->ip.proto)
+                       return -EINVAL;
+
+               ipv6_key = nla_data(ovs_key);
+               if (ipv6_key->ipv6_proto != flow_key->ip.proto)
+                       return -EINVAL;
+
+               if (ipv6_key->ipv6_frag != flow_key->ip.frag)
+                       return -EINVAL;
+
+               if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
+                       return -EINVAL;
+
+               break;
+
+       case OVS_KEY_ATTR_TCP:
+               if (flow_key->ip.proto != IPPROTO_TCP)
+                       return -EINVAL;
+
+               return validate_tp_port(flow_key);
+
+       case OVS_KEY_ATTR_UDP:
+               if (flow_key->ip.proto != IPPROTO_UDP)
+                       return -EINVAL;
+
+               return validate_tp_port(flow_key);
+
+       case OVS_KEY_ATTR_SCTP:
+               if (flow_key->ip.proto != IPPROTO_SCTP)
+                       return -EINVAL;
+
+               return validate_tp_port(flow_key);
+
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int validate_userspace(const struct nlattr *attr)
+{
+       static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
+               [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
+               [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
+       };
+       struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
+       int error;
+
+       error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
+                                attr, userspace_policy);
+       if (error)
+               return error;
+
+       if (!a[OVS_USERSPACE_ATTR_PID] ||
+           !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
+               return -EINVAL;
+
+       return 0;
+}
+
+static int copy_action(const struct nlattr *from,
+                      struct sw_flow_actions **sfa)
+{
+       int totlen = NLA_ALIGN(from->nla_len);
+       struct nlattr *to;
+
+       to = reserve_sfa_size(sfa, from->nla_len);
+       if (IS_ERR(to))
+               return PTR_ERR(to);
+
+       memcpy(to, from, totlen);
+       return 0;
+}
+
+int ovs_nla_copy_actions(const struct nlattr *attr,
+                        const struct sw_flow_key *key,
+                        int depth,
+                        struct sw_flow_actions **sfa)
+{
+       const struct nlattr *a;
+       int rem, err;
+
+       if (depth >= SAMPLE_ACTION_DEPTH)
+               return -EOVERFLOW;
+
+       nla_for_each_nested(a, attr, rem) {
+               /* Expected argument lengths, (u32)-1 for variable length. */
+               static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
+                       [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
+                       [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+                       [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
+                       [OVS_ACTION_ATTR_POP_VLAN] = 0,
+                       [OVS_ACTION_ATTR_SET] = (u32)-1,
+                       [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
+               };
+               const struct ovs_action_push_vlan *vlan;
+               int type = nla_type(a);
+               bool skip_copy;
+
+               if (type > OVS_ACTION_ATTR_MAX ||
+                   (action_lens[type] != nla_len(a) &&
+                    action_lens[type] != (u32)-1))
+                       return -EINVAL;
+
+               skip_copy = false;
+               switch (type) {
+               case OVS_ACTION_ATTR_UNSPEC:
+                       return -EINVAL;
+
+               case OVS_ACTION_ATTR_USERSPACE:
+                       err = validate_userspace(a);
+                       if (err)
+                               return err;
+                       break;
+
+               case OVS_ACTION_ATTR_OUTPUT:
+                       if (nla_get_u32(a) >= DP_MAX_PORTS)
+                               return -EINVAL;
+                       break;
+
+
+               case OVS_ACTION_ATTR_POP_VLAN:
+                       break;
+
+               case OVS_ACTION_ATTR_PUSH_VLAN:
+                       vlan = nla_data(a);
+                       if (vlan->vlan_tpid != htons(ETH_P_8021Q))
+                               return -EINVAL;
+                       if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
+                               return -EINVAL;
+                       break;
+
+               case OVS_ACTION_ATTR_SET:
+                       err = validate_set(a, key, sfa, &skip_copy);
+                       if (err)
+                               return err;
+                       break;
+
+               case OVS_ACTION_ATTR_SAMPLE:
+                       err = validate_and_copy_sample(a, key, depth, sfa);
+                       if (err)
+                               return err;
+                       skip_copy = true;
+                       break;
+
+               default:
+                       return -EINVAL;
+               }
+               if (!skip_copy) {
+                       err = copy_action(a, sfa);
+                       if (err)
+                               return err;
+               }
+       }
+
+       if (rem > 0)
+               return -EINVAL;
+
+       return 0;
+}
+
+static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
+{
+       const struct nlattr *a;
+       struct nlattr *start;
+       int err = 0, rem;
+
+       start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
+       if (!start)
+               return -EMSGSIZE;
+
+       nla_for_each_nested(a, attr, rem) {
+               int type = nla_type(a);
+               struct nlattr *st_sample;
+
+               switch (type) {
+               case OVS_SAMPLE_ATTR_PROBABILITY:
+                       if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY,
+                                   sizeof(u32), nla_data(a)))
+                               return -EMSGSIZE;
+                       break;
+               case OVS_SAMPLE_ATTR_ACTIONS:
+                       st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
+                       if (!st_sample)
+                               return -EMSGSIZE;
+                       err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
+                       if (err)
+                               return err;
+                       nla_nest_end(skb, st_sample);
+                       break;
+               }
+       }
+
+       nla_nest_end(skb, start);
+       return err;
+}
+
+static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
+{
+       const struct nlattr *ovs_key = nla_data(a);
+       int key_type = nla_type(ovs_key);
+       struct nlattr *start;
+       int err;
+
+       switch (key_type) {
+       case OVS_KEY_ATTR_IPV4_TUNNEL:
+               start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
+               if (!start)
+                       return -EMSGSIZE;
+
+               err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
+                                            nla_data(ovs_key));
+               if (err)
+                       return err;
+               nla_nest_end(skb, start);
+               break;
+       default:
+               if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
+                       return -EMSGSIZE;
+               break;
+       }
+
+       return 0;
+}
+
+int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
+{
+       const struct nlattr *a;
+       int rem, err;
+
+       nla_for_each_attr(a, attr, len, rem) {
+               int type = nla_type(a);
+
+               switch (type) {
+               case OVS_ACTION_ATTR_SET:
+                       err = set_action_to_attr(a, skb);
+                       if (err)
+                               return err;
+                       break;
+
+               case OVS_ACTION_ATTR_SAMPLE:
+                       err = sample_action_to_attr(a, skb);
+                       if (err)
+                               return err;
+                       break;
+               default:
+                       if (nla_put(skb, type, nla_len(a), nla_data(a)))
+                               return -EMSGSIZE;
+                       break;
+               }
+       }
+
+       return 0;
+}
diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h
new file mode 100644 (file)
index 0000000..4401510
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+
+#ifndef FLOW_NETLINK_H
+#define FLOW_NETLINK_H 1
+
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <linux/flex_array.h>
+
+#include <net/inet_ecn.h>
+#include <net/ip_tunnels.h>
+
+#include "flow.h"
+
+void ovs_match_init(struct sw_flow_match *match,
+                   struct sw_flow_key *key, struct sw_flow_mask *mask);
+
+int ovs_nla_put_flow(const struct sw_flow_key *,
+                    const struct sw_flow_key *, struct sk_buff *);
+int ovs_nla_get_flow_metadata(struct sw_flow *flow,
+                             const struct nlattr *attr);
+int ovs_nla_get_match(struct sw_flow_match *match,
+                     const struct nlattr *,
+                     const struct nlattr *);
+
+int ovs_nla_copy_actions(const struct nlattr *attr,
+                        const struct sw_flow_key *key, int depth,
+                        struct sw_flow_actions **sfa);
+int ovs_nla_put_actions(const struct nlattr *attr,
+                       int len, struct sk_buff *skb);
+
+struct sw_flow_actions *ovs_nla_alloc_flow_actions(int actions_len);
+void ovs_nla_free_flow_actions(struct sw_flow_actions *);
+
+#endif /* flow_netlink.h */
diff --git a/datapath/flow_table.c b/datapath/flow_table.c
new file mode 100644 (file)
index 0000000..98eb809
--- /dev/null
@@ -0,0 +1,579 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include "flow.h"
+#include "datapath.h"
+#include <linux/uaccess.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/llc_pdu.h>
+#include <linux/kernel.h>
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/llc.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/rcupdate.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/sctp.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/rculist.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+
+#include "datapath.h"
+#include "vlan.h"
+
+#define TBL_MIN_BUCKETS                1024
+#define REHASH_INTERVAL                (10 * 60 * HZ)
+
+static struct kmem_cache *flow_cache;
+
+static u16 range_n_bytes(const struct sw_flow_key_range *range)
+{
+       return range->end - range->start;
+}
+
+void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
+                      const struct sw_flow_mask *mask)
+{
+       const long *m = (long *)((u8 *)&mask->key + mask->range.start);
+       const long *s = (long *)((u8 *)src + mask->range.start);
+       long *d = (long *)((u8 *)dst + mask->range.start);
+       int i;
+
+       /* The memory outside of the 'mask->range' are not set since
+        * further operations on 'dst' only uses contents within
+        * 'mask->range'.
+        */
+       for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long))
+               *d++ = *s++ & *m++;
+}
+
+struct sw_flow *ovs_flow_alloc(void)
+{
+       struct sw_flow *flow;
+
+       flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
+       if (!flow)
+               return ERR_PTR(-ENOMEM);
+
+       spin_lock_init(&flow->lock);
+       flow->sf_acts = NULL;
+       flow->mask = NULL;
+
+       return flow;
+}
+
+int ovs_flow_tbl_count(struct flow_table *table)
+{
+       return table->count;
+}
+
+static struct flex_array *alloc_buckets(unsigned int n_buckets)
+{
+       struct flex_array *buckets;
+       int i, err;
+
+       buckets = flex_array_alloc(sizeof(struct hlist_head),
+                                  n_buckets, GFP_KERNEL);
+       if (!buckets)
+               return NULL;
+
+       err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL);
+       if (err) {
+               flex_array_free(buckets);
+               return NULL;
+       }
+
+       for (i = 0; i < n_buckets; i++)
+               INIT_HLIST_HEAD((struct hlist_head *)
+                                       flex_array_get(buckets, i));
+
+       return buckets;
+}
+
+static void flow_free(struct sw_flow *flow)
+{
+       kfree((struct sf_flow_acts __force *)flow->sf_acts);
+       kmem_cache_free(flow_cache, flow);
+}
+
+static void rcu_free_flow_callback(struct rcu_head *rcu)
+{
+       struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
+
+       flow_free(flow);
+}
+
+static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu)
+{
+       struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu);
+
+       kfree(mask);
+}
+
+static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred)
+{
+       if (!mask)
+               return;
+
+       BUG_ON(!mask->ref_count);
+       mask->ref_count--;
+
+       if (!mask->ref_count) {
+               list_del_rcu(&mask->list);
+               if (deferred)
+                       call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb);
+               else
+                       kfree(mask);
+       }
+}
+
+void ovs_flow_free(struct sw_flow *flow, bool deferred)
+{
+       if (!flow)
+               return;
+
+       flow_mask_del_ref(flow->mask, deferred);
+
+       if (deferred)
+               call_rcu(&flow->rcu, rcu_free_flow_callback);
+       else
+               flow_free(flow);
+}
+
+static void free_buckets(struct flex_array *buckets)
+{
+       flex_array_free(buckets);
+}
+
+static void __table_instance_destroy(struct table_instance *ti)
+{
+       int i;
+
+       if (ti->keep_flows)
+               goto skip_flows;
+
+       for (i = 0; i < ti->n_buckets; i++) {
+               struct sw_flow *flow;
+               struct hlist_head *head = flex_array_get(ti->buckets, i);
+               struct hlist_node *n;
+               int ver = ti->node_ver;
+
+               hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
+                       hlist_del(&flow->hash_node[ver]);
+                       ovs_flow_free(flow, false);
+               }
+       }
+
+skip_flows:
+       free_buckets(ti->buckets);
+       kfree(ti);
+}
+
+static struct table_instance *table_instance_alloc(int new_size)
+{
+       struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
+
+       if (!ti)
+               return NULL;
+
+       ti->buckets = alloc_buckets(new_size);
+
+       if (!ti->buckets) {
+               kfree(ti);
+               return NULL;
+       }
+       ti->n_buckets = new_size;
+       ti->node_ver = 0;
+       ti->keep_flows = false;
+       get_random_bytes(&ti->hash_seed, sizeof(u32));
+
+       return ti;
+}
+
+int ovs_flow_tbl_init(struct flow_table *table)
+{
+       struct table_instance *ti;
+
+       ti = table_instance_alloc(TBL_MIN_BUCKETS);
+
+       if (!ti)
+               return -ENOMEM;
+
+       rcu_assign_pointer(table->ti, ti);
+       INIT_LIST_HEAD(&table->mask_list);
+       table->last_rehash = jiffies;
+       table->count = 0;
+       return 0;
+}
+
+static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
+{
+       struct table_instance *ti = container_of(rcu, struct table_instance, rcu);
+
+       __table_instance_destroy(ti);
+}
+
+static void table_instance_destroy(struct table_instance *ti, bool deferred)
+{
+       if (!ti)
+               return;
+
+       if (deferred)
+               call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
+       else
+               __table_instance_destroy(ti);
+}
+
+void ovs_flow_tbl_destroy(struct flow_table *table)
+{
+       struct table_instance *ti = ovsl_dereference(table->ti);
+
+       table_instance_destroy(ti, false);
+}
+
+struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
+                                      u32 *bucket, u32 *last)
+{
+       struct sw_flow *flow;
+       struct hlist_head *head;
+       int ver;
+       int i;
+
+       ver = ti->node_ver;
+       while (*bucket < ti->n_buckets) {
+               i = 0;
+               head = flex_array_get(ti->buckets, *bucket);
+               hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
+                       if (i < *last) {
+                               i++;
+                               continue;
+                       }
+                       *last = i + 1;
+                       return flow;
+               }
+               (*bucket)++;
+               *last = 0;
+       }
+
+       return NULL;
+}
+
+static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
+{
+       hash = jhash_1word(hash, ti->hash_seed);
+       return flex_array_get(ti->buckets,
+                               (hash & (ti->n_buckets - 1)));
+}
+
+static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow)
+{
+       struct hlist_head *head;
+
+       head = find_bucket(ti, flow->hash);
+       hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head);
+}
+
+static void flow_table_copy_flows(struct table_instance *old,
+                                 struct table_instance *new)
+{
+       int old_ver;
+       int i;
+
+       old_ver = old->node_ver;
+       new->node_ver = !old_ver;
+
+       /* Insert in new table. */
+       for (i = 0; i < old->n_buckets; i++) {
+               struct sw_flow *flow;
+               struct hlist_head *head;
+
+               head = flex_array_get(old->buckets, i);
+
+               hlist_for_each_entry(flow, head, hash_node[old_ver])
+                       table_instance_insert(new, flow);
+       }
+
+       old->keep_flows = true;
+}
+
+static struct table_instance *table_instance_rehash(struct table_instance *ti,
+                                           int n_buckets)
+{
+       struct table_instance *new_ti;
+
+       new_ti = table_instance_alloc(n_buckets);
+       if (!new_ti)
+               return NULL;
+
+       flow_table_copy_flows(ti, new_ti);
+
+       return new_ti;
+}
+
+int ovs_flow_tbl_flush(struct flow_table *flow_table)
+{
+       struct table_instance *old_ti;
+       struct table_instance *new_ti;
+
+       old_ti = ovsl_dereference(flow_table->ti);
+       new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+       if (!new_ti)
+               return -ENOMEM;
+
+       rcu_assign_pointer(flow_table->ti, new_ti);
+       flow_table->last_rehash = jiffies;
+       flow_table->count = 0;
+
+       table_instance_destroy(old_ti, true);
+       return 0;
+}
+
+static u32 flow_hash(const struct sw_flow_key *key, int key_start,
+                    int key_end)
+{
+       u32 *hash_key = (u32 *)((u8 *)key + key_start);
+       int hash_u32s = (key_end - key_start) >> 2;
+
+       /* Make sure number of hash bytes are multiple of u32. */
+       BUILD_BUG_ON(sizeof(long) % sizeof(u32));
+
+       return jhash2(hash_key, hash_u32s, 0);
+}
+
+static int flow_key_start(const struct sw_flow_key *key)
+{
+       if (key->tun_key.ipv4_dst)
+               return 0;
+       else
+               return rounddown(offsetof(struct sw_flow_key, phy),
+                                         sizeof(long));
+}
+
+static bool cmp_key(const struct sw_flow_key *key1,
+                   const struct sw_flow_key *key2,
+                   int key_start, int key_end)
+{
+       const long *cp1 = (long *)((u8 *)key1 + key_start);
+       const long *cp2 = (long *)((u8 *)key2 + key_start);
+       long diffs = 0;
+       int i;
+
+       for (i = key_start; i < key_end;  i += sizeof(long))
+               diffs |= *cp1++ ^ *cp2++;
+
+       return diffs == 0;
+}
+
+static bool flow_cmp_masked_key(const struct sw_flow *flow,
+                               const struct sw_flow_key *key,
+                               int key_start, int key_end)
+{
+       return cmp_key(&flow->key, key, key_start, key_end);
+}
+
+bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
+                              struct sw_flow_match *match)
+{
+       struct sw_flow_key *key = match->key;
+       int key_start = flow_key_start(key);
+       int key_end = match->range.end;
+
+       return cmp_key(&flow->unmasked_key, key, key_start, key_end);
+}
+
+static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
+                                         const struct sw_flow_key *unmasked,
+                                         struct sw_flow_mask *mask)
+{
+       struct sw_flow *flow;
+       struct hlist_head *head;
+       int key_start = mask->range.start;
+       int key_end = mask->range.end;
+       u32 hash;
+       struct sw_flow_key masked_key;
+
+       ovs_flow_mask_key(&masked_key, unmasked, mask);
+       hash = flow_hash(&masked_key, key_start, key_end);
+       head = find_bucket(ti, hash);
+       hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
+               if (flow->mask == mask &&
+                   flow_cmp_masked_key(flow, &masked_key,
+                                         key_start, key_end))
+                       return flow;
+       }
+       return NULL;
+}
+
+struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
+                                   const struct sw_flow_key *key)
+{
+       struct table_instance *ti = rcu_dereference(tbl->ti);
+       struct sw_flow_mask *mask;
+       struct sw_flow *flow;
+
+       list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
+               flow = masked_flow_lookup(ti, key, mask);
+               if (flow)  /* Found */
+                       return flow;
+       }
+       return NULL;
+}
+
+static struct table_instance *table_instance_expand(struct table_instance *ti)
+{
+       return table_instance_rehash(ti, ti->n_buckets * 2);
+}
+
+void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
+{
+       struct table_instance *ti = ovsl_dereference(table->ti);
+
+       BUG_ON(table->count == 0);
+       hlist_del_rcu(&flow->hash_node[ti->node_ver]);
+       table->count--;
+}
+
+static struct sw_flow_mask *mask_alloc(void)
+{
+       struct sw_flow_mask *mask;
+
+       mask = kmalloc(sizeof(*mask), GFP_KERNEL);
+       if (mask)
+               mask->ref_count = 0;
+
+       return mask;
+}
+
+static void mask_add_ref(struct sw_flow_mask *mask)
+{
+       mask->ref_count++;
+}
+
+static bool mask_equal(const struct sw_flow_mask *a,
+                      const struct sw_flow_mask *b)
+{
+       u8 *a_ = (u8 *)&a->key + a->range.start;
+       u8 *b_ = (u8 *)&b->key + b->range.start;
+
+       return  (a->range.end == b->range.end)
+               && (a->range.start == b->range.start)
+               && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0);
+}
+
+static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
+                                          const struct sw_flow_mask *mask)
+{
+       struct list_head *ml;
+
+       list_for_each(ml, &tbl->mask_list) {
+               struct sw_flow_mask *m;
+               m = container_of(ml, struct sw_flow_mask, list);
+               if (mask_equal(mask, m))
+                       return m;
+       }
+
+       return NULL;
+}
+
+/**
+ * add a new mask into the mask list.
+ * The caller needs to make sure that 'mask' is not the same
+ * as any masks that are already on the list.
+ */
+static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
+                           struct sw_flow_mask *new)
+{
+       struct sw_flow_mask *mask;
+       mask = flow_mask_find(tbl, new);
+       if (!mask) {
+               /* Allocate a new mask if none exsits. */
+               mask = mask_alloc();
+               if (!mask)
+                       return -ENOMEM;
+               mask->key = new->key;
+               mask->range = new->range;
+               list_add_rcu(&mask->list, &tbl->mask_list);
+       }
+
+       mask_add_ref(mask);
+       flow->mask = mask;
+       return 0;
+}
+
+int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
+                       struct sw_flow_mask *mask)
+{
+       struct table_instance *new_ti = NULL;
+       struct table_instance *ti;
+       int err;
+
+       err = flow_mask_insert(table, flow, mask);
+       if (err)
+               return err;
+
+       flow->hash = flow_hash(&flow->key, flow->mask->range.start,
+                       flow->mask->range.end);
+       ti = ovsl_dereference(table->ti);
+       table_instance_insert(ti, flow);
+       table->count++;
+
+       /* Expand table, if necessary, to make room. */
+       if (table->count > ti->n_buckets)
+               new_ti = table_instance_expand(ti);
+       else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
+               new_ti = table_instance_rehash(ti, ti->n_buckets);
+
+       if (new_ti) {
+               rcu_assign_pointer(table->ti, new_ti);
+               table_instance_destroy(ti, true);
+               table->last_rehash = jiffies;
+       }
+       return 0;
+}
+
+/* Initializes the flow module.
+ * Returns zero if successful or a negative error code. */
+int ovs_flow_init(void)
+{
+       BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
+       BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
+
+       flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
+                                       0, NULL);
+       if (flow_cache == NULL)
+               return -ENOMEM;
+
+       return 0;
+}
+
+/* Uninitializes the flow module. */
+void ovs_flow_exit(void)
+{
+       kmem_cache_destroy(flow_cache);
+}
diff --git a/datapath/flow_table.h b/datapath/flow_table.h
new file mode 100644 (file)
index 0000000..4db5f78
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef FLOW_TABLE_H
+#define FLOW_TABLE_H 1
+
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <linux/flex_array.h>
+
+#include <net/inet_ecn.h>
+#include <net/ip_tunnels.h>
+
+#include "flow.h"
+
+struct table_instance {
+       struct flex_array *buckets;
+       unsigned int n_buckets;
+       struct rcu_head rcu;
+       int node_ver;
+       u32 hash_seed;
+       bool keep_flows;
+};
+
+struct flow_table {
+       struct table_instance __rcu *ti;
+       struct list_head mask_list;
+       unsigned long last_rehash;
+       unsigned int count;
+};
+
+int ovs_flow_init(void);
+void ovs_flow_exit(void);
+
+struct sw_flow *ovs_flow_alloc(void);
+void ovs_flow_free(struct sw_flow *, bool deferred);
+
+int ovs_flow_tbl_init(struct flow_table *);
+int ovs_flow_tbl_count(struct flow_table *table);
+void ovs_flow_tbl_destroy(struct flow_table *table);
+int ovs_flow_tbl_flush(struct flow_table *flow_table);
+
+int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
+                       struct sw_flow_mask *mask);
+void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
+struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
+                                      u32 *bucket, u32 *idx);
+struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
+                                   const struct sw_flow_key *);
+
+bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
+                              struct sw_flow_match *match);
+
+void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
+                      const struct sw_flow_mask *mask);
+#endif /* flow_table.h */
index d74ad3c..32b1770 100644 (file)
@@ -15,6 +15,8 @@
 /flex_array.c
 /flow.c
 /flow_dissector.c
+/flow_netlink.c
+/flow_table.c
 /genetlink-openvswitch.c
 /genl_exec.c
 /gre.c
index 4f7671b..09d0fd7 100644 (file)
@@ -219,8 +219,6 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
        int min_headroom;
        int err;
 
-       skb_reset_inner_headers(skb);
-
        min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
                        + VXLAN_HLEN + sizeof(struct iphdr)
                        + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
@@ -239,6 +237,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
                vlan_set_tci(skb, 0);
        }
 
+       skb_reset_inner_headers(skb);
+
        vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
        vxh->vx_flags = htonl(VXLAN_FLAGS);
        vxh->vx_vni = vni;
index de5ff6a..ca272fd 100644 (file)
@@ -1717,8 +1717,9 @@ OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24);
  *
  * Format: 20-bit IPv6 flow label in least-significant bits.
  *
- * Masking: Not maskable. */
-#define NXM_NX_IPV6_LABEL  NXM_HEADER  (0x0001, 27, 4)
+ * Masking: Fully maskable. */
+#define NXM_NX_IPV6_LABEL   NXM_HEADER  (0x0001, 27, 4)
+#define NXM_NX_IPV6_LABEL_W NXM_HEADER_W(0x0001, 27, 4)
 
 /* The ECN of the IP header.
  *
index 5018f85..45d03ef 100644 (file)
@@ -104,8 +104,8 @@ enum ofp_version {
 #define OFP_MAX_TABLE_NAME_LEN 32
 #define OFP_MAX_PORT_NAME_LEN  16
 
-#define OFP_TCP_PORT  6633
-#define OFP_SSL_PORT  6633
+#define OFP_OLD_PORT  6633
+#define OFP_PORT  6653
 
 #define OFP_ETH_ALEN 6          /* Bytes in an Ethernet address. */
 
index 9d8d2fc..98447d1 100644 (file)
@@ -363,341 +363,6 @@ static const uint32_t Te4[256] = {
     0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
 };
 
-static const uint32_t Td0[256] = {
-    0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
-    0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
-    0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
-    0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
-    0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
-    0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
-    0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
-    0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
-    0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
-    0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
-    0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
-    0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
-    0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
-    0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
-    0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
-    0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
-    0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
-    0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
-    0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
-    0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
-    0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
-    0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
-    0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
-    0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
-    0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
-    0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
-    0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
-    0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
-    0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
-    0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
-    0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
-    0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
-    0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
-    0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
-    0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
-    0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
-    0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
-    0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
-    0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
-    0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
-    0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
-    0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
-    0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
-    0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
-    0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
-    0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
-    0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
-    0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
-    0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
-    0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
-    0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
-    0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
-    0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
-    0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
-    0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
-    0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
-    0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
-    0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
-    0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
-    0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
-    0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
-    0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
-    0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
-    0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
-};
-
-static const uint32_t Td1[256] = {
-    0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
-    0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
-    0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
-    0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
-    0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
-    0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
-    0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
-    0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
-    0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
-    0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
-    0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
-    0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
-    0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
-    0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
-    0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
-    0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
-    0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
-    0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
-    0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
-    0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
-    0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
-    0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
-    0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
-    0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
-    0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
-    0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
-    0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
-    0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
-    0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
-    0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
-    0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
-    0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
-    0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
-    0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
-    0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
-    0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
-    0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
-    0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
-    0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
-    0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
-    0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
-    0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
-    0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
-    0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
-    0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
-    0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
-    0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
-    0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
-    0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
-    0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
-    0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
-    0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
-    0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
-    0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
-    0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
-    0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
-    0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
-    0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
-    0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
-    0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
-    0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
-    0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
-    0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
-    0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
-};
-
-static const uint32_t Td2[256] = {
-    0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
-    0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
-    0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
-    0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
-    0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
-    0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
-    0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
-    0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
-    0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
-    0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
-    0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
-    0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
-    0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
-    0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
-    0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
-    0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
-    0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
-    0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
-    0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
-    0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
-    0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
-    0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
-    0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
-    0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
-    0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
-    0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
-    0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
-    0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
-    0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
-    0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
-    0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
-    0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
-    0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
-    0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
-    0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
-    0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
-    0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
-    0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
-    0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
-    0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
-    0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
-    0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
-    0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
-    0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
-    0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
-    0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
-    0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
-    0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
-    0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
-    0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
-    0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
-    0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
-    0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
-    0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
-    0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
-    0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
-    0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
-    0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
-    0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
-    0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
-    0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
-    0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
-    0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
-    0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
-};
-
-static const uint32_t Td3[256] = {
-    0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
-    0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
-    0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
-    0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
-    0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
-    0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
-    0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
-    0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
-    0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
-    0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
-    0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
-    0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
-    0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
-    0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
-    0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
-    0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
-    0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
-    0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
-    0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
-    0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
-    0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
-    0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
-    0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
-    0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
-    0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
-    0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
-    0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
-    0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
-    0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
-    0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
-    0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
-    0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
-    0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
-    0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
-    0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
-    0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
-    0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
-    0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
-    0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
-    0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
-    0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
-    0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
-    0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
-    0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
-    0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
-    0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
-    0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
-    0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
-    0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
-    0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
-    0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
-    0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
-    0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
-    0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
-    0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
-    0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
-    0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
-    0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
-    0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
-    0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
-    0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
-    0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
-    0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
-    0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
-};
-
-static const uint32_t Td4[256] = {
-    0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
-    0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
-    0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
-    0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
-    0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U,
-    0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
-    0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U,
-    0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
-    0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U,
-    0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
-    0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU,
-    0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
-    0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U,
-    0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
-    0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U,
-    0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
-    0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U,
-    0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
-    0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU,
-    0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
-    0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U,
-    0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
-    0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U,
-    0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
-    0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U,
-    0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
-    0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U,
-    0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
-    0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU,
-    0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
-    0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U,
-    0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
-    0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
-    0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
-    0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
-    0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
-    0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
-    0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
-    0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
-    0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
-    0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
-    0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
-    0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
-    0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
-    0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
-    0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
-    0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
-    0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
-    0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
-    0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
-    0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
-    0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
-    0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
-    0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
-    0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
-    0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
-    0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
-    0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
-    0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
-    0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
-    0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
-    0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
-    0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
-    0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU,
-};
-
 static const uint32_t rcon[] = {
     0x01000000, 0x02000000, 0x04000000, 0x08000000,
     0x10000000, 0x20000000, 0x40000000, 0x80000000,
index b2d6dc2..ffaf89a 100644 (file)
@@ -201,6 +201,8 @@ lib_libopenvswitch_a_SOURCES = \
        lib/svec.h \
        lib/table.c \
        lib/table.h \
+       lib/tag.c \
+       lib/tag.h \
        lib/timer.c \
        lib/timer.h \
        lib/timeval.c \
index 36eb1f0..53487a4 100644 (file)
@@ -154,6 +154,7 @@ classifier_init(struct classifier *cls)
     cls->n_rules = 0;
     hmap_init(&cls->tables);
     list_init(&cls->tables_priority);
+    hmap_init(&cls->partitions);
     ovs_rwlock_init(&cls->rwlock);
 }
 
@@ -163,12 +164,20 @@ void
 classifier_destroy(struct classifier *cls)
 {
     if (cls) {
+        struct cls_table *partition, *next_partition;
         struct cls_table *table, *next_table;
 
         HMAP_FOR_EACH_SAFE (table, next_table, hmap_node, &cls->tables) {
             destroy_table(cls, table);
         }
         hmap_destroy(&cls->tables);
+
+        HMAP_FOR_EACH_SAFE (partition, next_partition, hmap_node,
+                            &cls->partitions) {
+            hmap_remove(&cls->partitions, &partition->hmap_node);
+            free(partition);
+        }
+        hmap_destroy(&cls->partitions);
         ovs_rwlock_destroy(&cls->rwlock);
     }
 }
@@ -187,6 +196,44 @@ classifier_count(const struct classifier *cls)
     return cls->n_rules;
 }
 
+static uint32_t
+hash_metadata(ovs_be64 metadata_)
+{
+    uint64_t metadata = (OVS_FORCE uint64_t) metadata_;
+    return hash_2words(metadata, metadata >> 32);
+}
+
+static struct cls_partition *
+find_partition(const struct classifier *cls, ovs_be64 metadata, uint32_t hash)
+{
+    struct cls_partition *partition;
+
+    HMAP_FOR_EACH_IN_BUCKET (partition, hmap_node, hash, &cls->partitions) {
+        if (partition->metadata == metadata) {
+            return partition;
+        }
+    }
+
+    return NULL;
+}
+
+static struct cls_partition *
+create_partition(struct classifier *cls, struct cls_table *table,
+                 ovs_be64 metadata)
+{
+    uint32_t hash = hash_metadata(metadata);
+    struct cls_partition *partition = find_partition(cls, metadata, hash);
+    if (!partition) {
+        partition = xmalloc(sizeof *partition);
+        partition->metadata = metadata;
+        partition->tags = 0;
+        tag_tracker_init(&partition->tracker);
+        hmap_insert(&cls->partitions, &partition->hmap_node, hash);
+    }
+    tag_tracker_add(&partition->tracker, &partition->tags, table->tag);
+    return partition;
+}
+
 /* Inserts 'rule' into 'cls'.  Until 'rule' is removed from 'cls', the caller
  * must not modify or free it.
  *
@@ -213,8 +260,17 @@ classifier_replace(struct classifier *cls, struct cls_rule *rule)
 
     old_rule = insert_rule(cls, table, rule);
     if (!old_rule) {
+        if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) {
+            ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow);
+            rule->partition = create_partition(cls, table, metadata);
+        } else {
+            rule->partition = NULL;
+        }
+
         table->n_table_rules++;
         cls->n_rules++;
+    } else {
+        rule->partition = old_rule->partition;
     }
     return old_rule;
 }
@@ -238,6 +294,7 @@ classifier_insert(struct classifier *cls, struct cls_rule *rule)
 void
 classifier_remove(struct classifier *cls, struct cls_rule *rule)
 {
+    struct cls_partition *partition;
     struct cls_rule *head;
     struct cls_table *table;
 
@@ -255,6 +312,16 @@ classifier_remove(struct classifier *cls, struct cls_rule *rule)
         hmap_replace(&table->rules, &rule->hmap_node, &next->hmap_node);
     }
 
+    partition = rule->partition;
+    if (partition) {
+        tag_tracker_subtract(&partition->tracker, &partition->tags,
+                             table->tag);
+        if (!partition->tags) {
+            hmap_remove(&cls->partitions, &partition->hmap_node);
+            free(partition);
+        }
+    }
+
     if (--table->n_table_rules == 0) {
         destroy_table(cls, table);
     } else {
@@ -275,13 +342,44 @@ struct cls_rule *
 classifier_lookup(const struct classifier *cls, const struct flow *flow,
                   struct flow_wildcards *wc)
 {
+    const struct cls_partition *partition;
     struct cls_table *table;
     struct cls_rule *best;
+    tag_type tags;
+
+    /* Determine 'tags' such that, if 'table->tag' doesn't intersect them, then
+     * 'flow' cannot possibly match in 'table':
+     *
+     *     - If flow->metadata maps to a given 'partition', then we can use
+     *       'tags' for 'partition->tags'.
+     *
+     *     - If flow->metadata has no partition, then no rule in 'cls' has an
+     *       exact-match for flow->metadata.  That means that we don't need to
+     *       search any table that includes flow->metadata in its mask.
+     *
+     * In either case, we always need to search any cls_tables that do not
+     * include flow->metadata in its mask.  One way to do that would be to
+     * check the "cls_table"s explicitly for that, but that would require an
+     * extra branch per table.  Instead, we mark such a cls_table's 'tags' as
+     * TAG_ALL and make sure that 'tags' is never empty.  This means that
+     * 'tags' always intersects such a cls_table's 'tags', so we don't need a
+     * special case.
+     */
+    partition = (hmap_is_empty(&cls->partitions)
+                 ? NULL
+                 : find_partition(cls, flow->metadata,
+                                  hash_metadata(flow->metadata)));
+    tags = partition ? partition->tags : TAG_ARBITRARY;
 
     best = NULL;
     LIST_FOR_EACH (table, list_node, &cls->tables_priority) {
-        struct cls_rule *rule = find_match(table, flow);
+        struct cls_rule *rule;
 
+        if (!tag_intersects(tags, table->tag)) {
+            continue;
+        }
+
+        rule = find_match(table, flow);
         if (wc) {
             flow_wildcards_fold_minimask(wc, &table->mask);
         }
@@ -293,6 +391,10 @@ classifier_lookup(const struct classifier *cls, const struct flow *flow,
                      * can not find anything better. */
                     return best;
                 }
+                if (!tag_intersects(tags, table->tag)) {
+                    continue;
+                }
+
                 rule = find_match(table, flow);
                 if (wc) {
                     flow_wildcards_fold_minimask(wc, &table->mask);
@@ -550,6 +652,7 @@ find_table(const struct classifier *cls, const struct minimask *mask)
 static struct cls_table *
 insert_table(struct classifier *cls, const struct minimask *mask)
 {
+    uint32_t hash = minimask_hash(mask, 0);
     struct cls_table *table;
 
     table = xzalloc(sizeof *table);
@@ -557,6 +660,9 @@ insert_table(struct classifier *cls, const struct minimask *mask)
     minimask_clone(&table->mask, mask);
     hmap_insert(&cls->tables, &table->hmap_node, minimask_hash(mask, 0));
     list_push_back(&cls->tables_priority, &table->list_node);
+    table->tag = (minimask_get_metadata_mask(mask) == OVS_BE64_MAX
+                  ? tag_create_deterministic(hash)
+                  : TAG_ALL);
 
     return table;
 }
@@ -668,8 +774,7 @@ find_match(const struct cls_table *table, const struct flow *flow)
     struct cls_rule *rule;
 
     HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &table->rules) {
-        if (miniflow_equal_flow_in_minimask(&rule->match.flow, flow,
-                                            &table->mask)) {
+        if (minimatch_matches_flow(&rule->match, flow)) {
             return rule;
         }
     }
index a795b4a..0e39012 100644 (file)
 
 /* Flow classifier.
  *
- * A classifier is a "struct classifier",
- *      a hash map from a set of wildcards to a "struct cls_table",
- *              a hash map from fixed field values to "struct cls_rule",
- *                      which can contain a list of otherwise identical rules
- *                      with lower priorities.
+ *
+ * What?
+ * =====
+ *
+ * A flow classifier holds any number of "rules", each of which specifies
+ * values to match for some fields or subfields and a priority.  The primary
+ * design goal for the classifier is that, given a packet, it can as quickly as
+ * possible find the highest-priority rule that matches the packet.
+ *
+ * Each OpenFlow table is implemented as a flow classifier.
+ *
+ *
+ * Basic Design
+ * ============
+ *
+ * Suppose that all the rules in a classifier had the same form.  For example,
+ * suppose that they all matched on the source and destination Ethernet address
+ * and wildcarded all the other fields.  Then the obvious way to implement a
+ * classifier would be a hash table on the source and destination Ethernet
+ * addresses.  If new classification rules came along with a different form,
+ * you could add a second hash table that hashed on the fields matched in those
+ * rules.  With two hash tables, you look up a given flow in each hash table.
+ * If there are no matches, the classifier didn't contain a match; if you find
+ * a match in one of them, that's the result; if you find a match in both of
+ * them, then the result is the rule with the higher priority.
+ *
+ * This is how the classifier works.  In a "struct classifier", each form of
+ * "struct cls_rule" present (based on its ->match.mask) goes into a separate
+ * "struct cls_table".  A lookup does a hash lookup in every "struct cls_table"
+ * in the classifier and tracks the highest-priority match that it finds.  The
+ * tables are kept in a descending priority order according to the highest
+ * priority rule in each table, which allows lookup to skip over tables that
+ * can't possibly have a higher-priority match than already found.
+ *
+ * One detail: a classifier can contain multiple rules that are identical other
+ * than their priority.  When this happens, only the highest priority rule out
+ * of a group of otherwise identical rules is stored directly in the "struct
+ * cls_table", with the other almost-identical rules chained off a linked list
+ * inside that highest-priority rule.
+ *
+ *
+ * Partitioning
+ * ============
+ *
+ * Suppose that a given classifier is being used to handle multiple stages in a
+ * pipeline using "resubmit", with metadata (that is, the OpenFlow 1.1+ field
+ * named "metadata") distinguishing between the different stages.  For example,
+ * metadata value 1 might identify ingress rules, metadata value 2 might
+ * identify ACLs, and metadata value 3 might identify egress rules.  Such a
+ * classifier is essentially partitioned into multiple sub-classifiers on the
+ * basis of the metadata value.
+ *
+ * The classifier has a special optimization to speed up matching in this
+ * scenario:
+ *
+ *     - Each cls_table that matches on metadata gets a tag derived from the
+ *       table's mask, so that it is likely that each table has a unique tag.
+ *       (Duplicate tags have a performance cost but do not affect
+ *       correctness.)
+ *
+ *     - For each metadata value matched by any cls_rule, the classifier
+ *       constructs a "struct cls_partition" indexed by the metadata value.
+ *       The cls_partition has a 'tags' member whose value is the bitwise-OR of
+ *       the tags of each cls_table that contains any rule that matches on the
+ *       cls_partition's metadata value.  In other words, struct cls_partition
+ *       associates metadata values with tables that need to be checked with
+ *       flows with that specific metadata value.
+ *
+ * Thus, a flow lookup can start by looking up the partition associated with
+ * the flow's metadata, and then skip over any cls_table whose 'tag' does not
+ * intersect the partition's 'tags'.  (The flow must also be looked up in any
+ * cls_table that doesn't match on metadata.  We handle that by giving any such
+ * cls_table TAG_ALL as its 'tags' so that it matches any tag.)
+ *
  *
  * Thread-safety
  * =============
 #include "hmap.h"
 #include "list.h"
 #include "match.h"
+#include "tag.h"
 #include "openflow/nicira-ext.h"
 #include "openflow/openflow.h"
 #include "ovs-thread.h"
@@ -54,6 +124,7 @@ struct classifier {
     int n_rules;                /* Total number of rules. */
     struct hmap tables;         /* Contains "struct cls_table"s.  */
     struct list tables_priority; /* Tables in descending priority order */
+    struct hmap partitions;     /* Contains "struct cls_partition"s. */
     struct ovs_rwlock rwlock OVS_ACQ_AFTER(ofproto_mutex);
 };
 
@@ -66,6 +137,7 @@ struct cls_table {
     int n_table_rules;          /* Number of rules, including duplicates. */
     unsigned int max_priority;  /* Max priority of any rule in the table. */
     unsigned int max_count;     /* Count of max_priority rules. */
+    tag_type tag;               /* Tag generated from mask for partitioning. */
 };
 
 /* Returns true if 'table' is a "catch-all" table that will match every
@@ -82,6 +154,17 @@ struct cls_rule {
     struct list list;           /* List of identical, lower-priority rules. */
     struct minimatch match;     /* Matching rule. */
     unsigned int priority;      /* Larger numbers are higher priorities. */
+    struct cls_partition *partition;
+};
+
+/* Associates a metadata value (that is, a value of the OpenFlow 1.1+ metadata
+ * field) with tags for the "cls_table"s that contain rules that match that
+ * metadata value.  */
+struct cls_partition {
+    struct hmap_node hmap_node; /* In struct classifier's 'partitions' hmap. */
+    ovs_be64 metadata;          /* metadata value for this partition. */
+    tag_type tags;              /* OR of each included flow's cls_table tag. */
+    struct tag_tracker tracker; /* Tracks the bits in 'tags'. */
 };
 
 void cls_rule_init(struct cls_rule *, const struct match *,
index 9718894..8e5df81 100644 (file)
@@ -8,4 +8,6 @@ main loop takes unusually long to run.
 Coverage counters are useful mainly for performance analysis and
 debugging.
 .IP "\fBcoverage/show\fR"
-Displays the values of all of the coverage counters.
+Displays the averaged per-second rates for the last few seconds, the
+last minute and the last hour, and the total counts of all of the
+coverage counters.
index 23e2997..4364734 100644 (file)
@@ -63,7 +63,14 @@ struct coverage_counter *coverage_counters[] = {
 
 static struct ovs_mutex coverage_mutex = OVS_MUTEX_INITIALIZER;
 
+static long long int coverage_run_time = LLONG_MIN;
+
+/* Index counter used to compute the moving average array's index. */
+static unsigned int idx_count = 0;
+
 static void coverage_read(struct svec *);
+static unsigned int coverage_array_sum(const unsigned int *arr,
+                                       const unsigned int len);
 
 static void
 coverage_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
@@ -206,6 +213,7 @@ coverage_log(void)
 static void
 coverage_read(struct svec *lines)
 {
+    struct coverage_counter **c = coverage_counters;
     unsigned long long int *totals;
     size_t n_never_hit;
     uint32_t hash;
@@ -215,24 +223,37 @@ coverage_read(struct svec *lines)
 
     n_never_hit = 0;
     svec_add_nocopy(lines,
-                    xasprintf("Event coverage, hash=%08"PRIx32":", hash));
+                    xasprintf("Event coverage, avg rate over last: %d "
+                              "seconds, last minute, last hour,  "
+                              "hash=%08"PRIx32":",
+                              COVERAGE_RUN_INTERVAL/1000, hash));
 
     totals = xmalloc(n_coverage_counters * sizeof *totals);
     ovs_mutex_lock(&coverage_mutex);
     for (i = 0; i < n_coverage_counters; i++) {
-        totals[i] = coverage_counters[i]->total;
+        totals[i] = c[i]->total;
     }
     ovs_mutex_unlock(&coverage_mutex);
 
     for (i = 0; i < n_coverage_counters; i++) {
         if (totals[i]) {
-            svec_add_nocopy(lines, xasprintf("%-24s %9llu",
-                                             coverage_counters[i]->name,
-                                             totals[i]));
+            /* Shows the averaged per-second rates for the last
+             * COVERAGE_RUN_INTERVAL interval, the last minute and
+             * the last hour. */
+            svec_add_nocopy(lines,
+                xasprintf("%-24s %5.1f/sec %9.3f/sec "
+                          "%13.4f/sec   total: %llu",
+                          c[i]->name,
+                          (c[i]->min[(idx_count - 1) % MIN_AVG_LEN]
+                           * 1000.0 / COVERAGE_RUN_INTERVAL),
+                          coverage_array_sum(c[i]->min, MIN_AVG_LEN) / 60.0,
+                          coverage_array_sum(c[i]->hr,  HR_AVG_LEN) / 3600.0,
+                          totals[i]));
         } else {
             n_never_hit++;
         }
     }
+
     svec_add_nocopy(lines, xasprintf("%zu events never hit", n_never_hit));
     free(totals);
 }
@@ -249,3 +270,85 @@ coverage_clear(void)
     }
     ovs_mutex_unlock(&coverage_mutex);
 }
+
+/* Runs approximately every COVERAGE_RUN_INTERVAL amount of time to update the
+ * coverage counters' 'min' and 'hr' array.  'min' array is for cumulating
+ * per second counts into per minute count.  'hr' array is for cumulating per
+ * minute counts into per hour count.  Every thread may call this function. */
+void
+coverage_run(void)
+{
+    /* Defines the moving average array index variables. */
+    static unsigned int min_idx, hr_idx;
+    struct coverage_counter **c = coverage_counters;
+    long long int now;
+
+    ovs_mutex_lock(&coverage_mutex);
+    now = time_msec();
+    /* Initialize the coverage_run_time. */
+    if (coverage_run_time == LLONG_MIN) {
+        coverage_run_time = now + COVERAGE_RUN_INTERVAL;
+    }
+
+    if (now >= coverage_run_time) {
+        size_t i, j;
+        /* Computes the number of COVERAGE_RUN_INTERVAL slots, since
+         * it is possible that the actual run interval is multiple of
+         * COVERAGE_RUN_INTERVAL. */
+        int slots = (now - coverage_run_time) / COVERAGE_RUN_INTERVAL + 1;
+
+        for (i = 0; i < n_coverage_counters; i++) {
+            unsigned int count, portion;
+            unsigned int m_idx = min_idx;
+            unsigned int h_idx = hr_idx;
+            unsigned int idx = idx_count;
+
+            /* Computes the differences between the current total and the one
+             * recorded in last invocation of coverage_run(). */
+            count = c[i]->total - c[i]->last_total;
+            c[i]->last_total = c[i]->total;
+            /* The count over the time interval is evenly distributed
+             * among slots by calculating the portion. */
+            portion = count / slots;
+
+            for (j = 0; j < slots; j++) {
+                /* Updates the index variables. */
+                /* The m_idx is increased from 0 to MIN_AVG_LEN - 1. Every
+                 * time the m_idx finishes a cycle (a cycle is one minute),
+                 * the h_idx is incremented by 1. */
+                m_idx = idx % MIN_AVG_LEN;
+                h_idx = idx / MIN_AVG_LEN;
+
+                c[i]->min[m_idx] = portion + (j == (slots - 1)
+                                              ? count % slots : 0);
+                c[i]->hr[h_idx] = m_idx == 0
+                                  ? c[i]->min[m_idx]
+                                  : (c[i]->hr[h_idx] + c[i]->min[m_idx]);
+                /* This is to guarantee that h_idx ranges from 0 to 59. */
+                idx = (idx + 1) % (MIN_AVG_LEN * HR_AVG_LEN);
+            }
+        }
+
+        /* Updates the global index variables. */
+        idx_count = (idx_count + slots) % (MIN_AVG_LEN * HR_AVG_LEN);
+        min_idx = idx_count % MIN_AVG_LEN;
+        hr_idx  = idx_count / MIN_AVG_LEN;
+        /* Updates the run time. */
+        coverage_run_time = now + COVERAGE_RUN_INTERVAL;
+    }
+    ovs_mutex_unlock(&coverage_mutex);
+}
+
+static unsigned int
+coverage_array_sum(const unsigned int *arr, const unsigned int len)
+{
+    unsigned int sum = 0;
+    size_t i;
+
+    ovs_mutex_lock(&coverage_mutex);
+    for (i = 0; i < len; i++) {
+        sum += arr[i];
+    }
+    ovs_mutex_unlock(&coverage_mutex);
+    return sum;
+}
index 3d1a115..163728e 100644 (file)
 #include "ovs-thread.h"
 #include "vlog.h"
 
+/* Makes coverage_run run every 5000 ms (5 seconds).
+ * If this value is redefined, the new value must
+ * divide 60000 (1 minute). */
+#define COVERAGE_RUN_INTERVAL    5000
+BUILD_ASSERT_DECL(60000 % COVERAGE_RUN_INTERVAL == 0);
+
+/* Defines the moving average array length. */
+#define MIN_AVG_LEN (60000/COVERAGE_RUN_INTERVAL)
+#define HR_AVG_LEN  60
+
 /* A coverage counter. */
 struct coverage_counter {
     const char *const name;            /* Textual name. */
     unsigned int (*const count)(void); /* Gets, zeros this thread's count. */
     unsigned long long int total;      /* Total count. */
+    unsigned long long int last_total;
+    /* The moving average arrays. */
+    unsigned int min[MIN_AVG_LEN];
+    unsigned int hr[HR_AVG_LEN];
 };
 
 /* Defines COUNTER.  There must be exactly one such definition at file scope
@@ -56,7 +70,7 @@ struct coverage_counter {
         }                                                               \
         extern struct coverage_counter counter_##COUNTER;               \
         struct coverage_counter counter_##COUNTER                       \
-            = { #COUNTER, COUNTER##_count, 0 };                         \
+            = { #COUNTER, COUNTER##_count, 0, 0, {0}, {0} };            \
         extern struct coverage_counter *counter_ptr_##COUNTER;          \
         struct coverage_counter *counter_ptr_##COUNTER                  \
             __attribute__((section("coverage"))) = &counter_##COUNTER
@@ -80,6 +94,7 @@ struct coverage_counter {
 void coverage_init(void);
 void coverage_log(void);
 void coverage_clear(void);
+void coverage_run(void);
 
 /* Implementation detail. */
 #define COVERAGE_DEFINE__(COUNTER)                              \
index bb95502..1681911 100644 (file)
@@ -51,8 +51,6 @@ COVERAGE_DEFINE(dpif_flow_flush);
 COVERAGE_DEFINE(dpif_flow_get);
 COVERAGE_DEFINE(dpif_flow_put);
 COVERAGE_DEFINE(dpif_flow_del);
-COVERAGE_DEFINE(dpif_flow_query_list);
-COVERAGE_DEFINE(dpif_flow_query_list_n);
 COVERAGE_DEFINE(dpif_execute);
 COVERAGE_DEFINE(dpif_purge);
 
@@ -1352,7 +1350,7 @@ log_flow_message(const struct dpif *dpif, int error, const char *operation,
     if (error) {
         ds_put_format(&ds, "(%s) ", ovs_strerror(error));
     }
-    odp_flow_format(key, key_len, mask, mask_len, &ds, true);
+    odp_flow_format(key, key_len, mask, mask_len, NULL, &ds, true);
     if (stats) {
         ds_put_cstr(&ds, ", ");
         dpif_flow_stats_format(stats, &ds);
index 9ab1961..0678c6f 100644 (file)
@@ -36,9 +36,6 @@
 #include "openflow/openflow.h"
 #include "packets.h"
 #include "unaligned.h"
-#include "vlog.h"
-
-VLOG_DEFINE_THIS_MODULE(flow);
 
 COVERAGE_DEFINE(flow_extract);
 COVERAGE_DEFINE(miniflow_malloc);
@@ -103,9 +100,11 @@ static void
 parse_mpls(struct ofpbuf *b, struct flow *flow)
 {
     struct mpls_hdr *mh;
+    bool top = true;
 
     while ((mh = ofpbuf_try_pull(b, sizeof *mh))) {
-        if (flow->mpls_depth++ == 0) {
+        if (top) {
+            top = false;
             flow->mpls_lse = mh->mpls_lse;
         }
         if (mh->mpls_lse & htonl(MPLS_BOS_MASK)) {
@@ -514,7 +513,7 @@ flow_zero_wildcards(struct flow *flow, const struct flow_wildcards *wildcards)
 void
 flow_get_metadata(const struct flow *flow, struct flow_metadata *fmd)
 {
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 20);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 21);
 
     fmd->tun_id = flow->tunnel.tun_id;
     fmd->tun_src = flow->tunnel.ip_src;
@@ -609,7 +608,6 @@ void
 flow_wildcards_init_exact(struct flow_wildcards *wc)
 {
     memset(&wc->masks, 0xff, sizeof wc->masks);
-    memset(wc->masks.zeros, 0, sizeof wc->masks.zeros);
 }
 
 /* Returns true if 'wc' matches every packet, false if 'wc' fixes any bits or
@@ -1093,13 +1091,38 @@ miniflow_alloc_values(struct miniflow *flow, int n)
     }
 }
 
+/* Completes an initialization of 'dst' as a miniflow copy of 'src' begun by
+ * the caller.  The caller must have already initialized 'dst->map' properly
+ * to indicate the nonzero uint32_t elements of 'src'.  'n' must be the number
+ * of 1-bits in 'dst->map'.
+ *
+ * This function initializes 'dst->values' (either inline if possible or with
+ * malloc() otherwise) and copies the nonzero uint32_t elements of 'src' into
+ * it. */
+static void
+miniflow_init__(struct miniflow *dst, const struct flow *src, int n)
+{
+    const uint32_t *src_u32 = (const uint32_t *) src;
+    unsigned int ofs;
+    int i;
+
+    dst->values = miniflow_alloc_values(dst, n);
+    ofs = 0;
+    for (i = 0; i < MINI_N_MAPS; i++) {
+        uint32_t map;
+
+        for (map = dst->map[i]; map; map = zero_rightmost_1bit(map)) {
+            dst->values[ofs++] = src_u32[raw_ctz(map) + i * 32];
+        }
+    }
+}
+
 /* Initializes 'dst' as a copy of 'src'.  The caller must eventually free 'dst'
  * with miniflow_destroy(). */
 void
 miniflow_init(struct miniflow *dst, const struct flow *src)
 {
     const uint32_t *src_u32 = (const uint32_t *) src;
-    unsigned int ofs;
     unsigned int i;
     int n;
 
@@ -1113,16 +1136,17 @@ miniflow_init(struct miniflow *dst, const struct flow *src)
         }
     }
 
-    /* Initialize dst->values. */
-    dst->values = miniflow_alloc_values(dst, n);
-    ofs = 0;
-    for (i = 0; i < MINI_N_MAPS; i++) {
-        uint32_t map;
+    miniflow_init__(dst, src, n);
+}
 
-        for (map = dst->map[i]; map; map = zero_rightmost_1bit(map)) {
-            dst->values[ofs++] = src_u32[raw_ctz(map) + i * 32];
-        }
-    }
+/* Initializes 'dst' as a copy of 'src', using 'mask->map' as 'dst''s map.  The
+ * caller must eventually free 'dst' with miniflow_destroy(). */
+void
+miniflow_init_with_minimask(struct miniflow *dst, const struct flow *src,
+                            const struct minimask *mask)
+{
+    memcpy(dst->map, mask->masks.map, sizeof dst->map);
+    miniflow_init__(dst, src, miniflow_n_values(dst));
 }
 
 /* Initializes 'dst' as a copy of 'src'.  The caller must eventually free 'dst'
@@ -1220,16 +1244,35 @@ miniflow_get_vid(const struct miniflow *flow)
 bool
 miniflow_equal(const struct miniflow *a, const struct miniflow *b)
 {
+    const uint32_t *ap = a->values;
+    const uint32_t *bp = b->values;
     int i;
 
     for (i = 0; i < MINI_N_MAPS; i++) {
-        if (a->map[i] != b->map[i]) {
-            return false;
+        const uint32_t a_map = a->map[i];
+        const uint32_t b_map = b->map[i];
+        uint32_t map;
+
+        if (a_map == b_map) {
+            for (map = a_map; map; map = zero_rightmost_1bit(map)) {
+                if (*ap++ != *bp++) {
+                    return false;
+                }
+            }
+        } else {
+            for (map = a_map | b_map; map; map = zero_rightmost_1bit(map)) {
+                uint32_t bit = rightmost_1bit(map);
+                uint32_t a_value = a_map & bit ? *ap++ : 0;
+                uint32_t b_value = b_map & bit ? *bp++ : 0;
+
+                if (a_value != b_value) {
+                    return false;
+                }
+            }
         }
     }
 
-    return !memcmp(a->values, b->values,
-                   miniflow_n_values(a) * sizeof *a->values);
+    return true;
 }
 
 /* Returns true if 'a' and 'b' are equal at the places where there are 1-bits
@@ -1289,10 +1332,24 @@ miniflow_equal_flow_in_minimask(const struct miniflow *a, const struct flow *b,
 uint32_t
 miniflow_hash(const struct miniflow *flow, uint32_t basis)
 {
-    BUILD_ASSERT_DECL(MINI_N_MAPS == 2);
-    return hash_3words(flow->map[0], flow->map[1],
-                       hash_words(flow->values, miniflow_n_values(flow),
-                                  basis));
+    const uint32_t *p = flow->values;
+    uint32_t hash = basis;
+    int i;
+
+    for (i = 0; i < MINI_N_MAPS; i++) {
+        uint32_t hash_map = 0;
+        uint32_t map;
+
+        for (map = flow->map[i]; map; map = zero_rightmost_1bit(map)) {
+            if (*p) {
+                hash = mhash_add(hash, *p);
+                hash_map |= rightmost_1bit(map);
+            }
+            p++;
+        }
+        hash = mhash_add(hash, hash_map);
+    }
+    return mhash_finish(hash, p - flow->values);
 }
 
 /* Returns a hash value for the bits of 'flow' where there are 1-bits in
@@ -1313,9 +1370,10 @@ miniflow_hash_in_minimask(const struct miniflow *flow,
         uint32_t map;
 
         for (map = mask->masks.map[i]; map; map = zero_rightmost_1bit(map)) {
-            int ofs = raw_ctz(map) + i * 32;
-
-            hash = mhash_add(hash, miniflow_get(flow, ofs) & *p);
+            if (*p) {
+                int ofs = raw_ctz(map) + i * 32;
+                hash = mhash_add(hash, miniflow_get(flow, ofs) & *p);
+            }
             p++;
         }
     }
@@ -1332,21 +1390,23 @@ uint32_t
 flow_hash_in_minimask(const struct flow *flow, const struct minimask *mask,
                       uint32_t basis)
 {
-    const uint32_t *flow_u32 = (const uint32_t *) flow;
+    const uint32_t *flow_u32;
     const uint32_t *p = mask->masks.values;
     uint32_t hash;
     int i;
 
     hash = basis;
+    flow_u32 = (const uint32_t *) flow;
     for (i = 0; i < MINI_N_MAPS; i++) {
         uint32_t map;
 
         for (map = mask->masks.map[i]; map; map = zero_rightmost_1bit(map)) {
-            int ofs = raw_ctz(map) + i * 32;
-
-            hash = mhash_add(hash, flow_u32[ofs] & *p);
+            if (*p) {
+                hash = mhash_add(hash, flow_u32[raw_ctz(map)] & *p);
+            }
             p++;
         }
+        flow_u32 += 32;
     }
 
     return mhash_finish(hash, (p - mask->masks.values) * 4);
@@ -1487,7 +1547,17 @@ bool
 minimask_is_catchall(const struct minimask *mask_)
 {
     const struct miniflow *mask = &mask_->masks;
+    const uint32_t *p = mask->values;
+    int i;
 
-    BUILD_ASSERT(MINI_N_MAPS == 2);
-    return !(mask->map[0] | mask->map[1]);
+    for (i = 0; i < MINI_N_MAPS; i++) {
+        uint32_t map;
+
+        for (map = mask->map[i]; map; map = zero_rightmost_1bit(map)) {
+            if (*p++) {
+                return false;
+            }
+        }
+    }
+    return true;
 }
index 75d95e8..4bd1504 100644 (file)
@@ -21,6 +21,7 @@
 #include <stdbool.h>
 #include <stdint.h>
 #include <string.h>
+#include "byte-order.h"
 #include "openflow/nicira-ext.h"
 #include "openflow/openflow.h"
 #include "hash.h"
@@ -36,7 +37,7 @@ struct ofpbuf;
 /* This sequence number should be incremented whenever anything involving flows
  * or the wildcarding of flows changes.  This will cause build assertion
  * failures in places which likely need to be updated. */
-#define FLOW_WC_SEQ 20
+#define FLOW_WC_SEQ 21
 
 #define FLOW_N_REGS 8
 BUILD_ASSERT_DECL(FLOW_N_REGS <= NXM_NX_MAX_REGS);
@@ -98,7 +99,6 @@ struct flow {
     union flow_in_port in_port; /* Input port.*/
     uint32_t pkt_mark;          /* Packet mark. */
     ovs_be32 mpls_lse;          /* MPLS label stack entry. */
-    uint16_t mpls_depth;        /* Depth of MPLS stack. */
     ovs_be16 vlan_tci;          /* If 802.1Q, TCI | VLAN_CFI; otherwise 0. */
     ovs_be16 dl_type;           /* Ethernet frame type. */
     ovs_be16 tp_src;            /* TCP/UDP/SCTP source port. */
@@ -111,15 +111,14 @@ struct flow {
     uint8_t arp_tha[6];         /* ARP/ND target hardware address. */
     uint8_t nw_ttl;             /* IP TTL/Hop Limit. */
     uint8_t nw_frag;            /* FLOW_FRAG_* flags. */
-    uint8_t zeros[6];
 };
 BUILD_ASSERT_DECL(sizeof(struct flow) % 4 == 0);
 
 #define FLOW_U32S (sizeof(struct flow) / 4)
 
 /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */
-BUILD_ASSERT_DECL(sizeof(struct flow) == sizeof(struct flow_tnl) + 160 &&
-                  FLOW_WC_SEQ == 20);
+BUILD_ASSERT_DECL(sizeof(struct flow) == sizeof(struct flow_tnl) + 152 &&
+                  FLOW_WC_SEQ == 21);
 
 /* Represents the metadata fields of struct flow. */
 struct flow_metadata {
@@ -291,7 +290,7 @@ bool flow_equal_except(const struct flow *a, const struct flow *b,
  *
  * The 'map' member holds one bit for each uint32_t in a "struct flow".  Each
  * 0-bit indicates that the corresponding uint32_t is zero, each 1-bit that it
- * is nonzero.
+ * *may* be nonzero.
  *
  * 'values' points to the start of an array that has one element for each 1-bit
  * in 'map'.  The least-numbered 1-bit is in values[0], the next 1-bit is in
@@ -309,9 +308,9 @@ bool flow_equal_except(const struct flow *a, const struct flow *b,
  *       that makes sense.  So far that's only proved useful for
  *       minimask_combine(), but the principle works elsewhere.
  *
- * The implementation maintains and depends on the invariant that every element
- * in 'values' is nonzero; that is, wherever a 1-bit appears in 'map', the
- * corresponding element of 'values' must be nonzero.
+ * Elements in 'values' are allowed to be zero.  This is useful for "struct
+ * minimatch", for which ensuring that the miniflow and minimask members have
+ * same 'map' allows optimization .
  */
 struct miniflow {
     uint32_t *values;
@@ -320,6 +319,8 @@ struct miniflow {
 };
 
 void miniflow_init(struct miniflow *, const struct flow *);
+void miniflow_init_with_minimask(struct miniflow *, const struct flow *,
+                                 const struct minimask *);
 void miniflow_clone(struct miniflow *, const struct miniflow *);
 void miniflow_move(struct miniflow *dst, struct miniflow *);
 void miniflow_destroy(struct miniflow *);
@@ -328,6 +329,7 @@ void miniflow_expand(const struct miniflow *, struct flow *);
 
 uint32_t miniflow_get(const struct miniflow *, unsigned int u32_ofs);
 uint16_t miniflow_get_vid(const struct miniflow *);
+static inline ovs_be64 miniflow_get_metadata(const struct miniflow *);
 
 bool miniflow_equal(const struct miniflow *a, const struct miniflow *b);
 bool miniflow_equal_in_minimask(const struct miniflow *a,
@@ -361,11 +363,36 @@ void minimask_expand(const struct minimask *, struct flow_wildcards *);
 
 uint32_t minimask_get(const struct minimask *, unsigned int u32_ofs);
 uint16_t minimask_get_vid_mask(const struct minimask *);
+static inline ovs_be64 minimask_get_metadata_mask(const struct minimask *);
 
 bool minimask_equal(const struct minimask *a, const struct minimask *b);
 uint32_t minimask_hash(const struct minimask *, uint32_t basis);
 
 bool minimask_has_extra(const struct minimask *, const struct minimask *);
 bool minimask_is_catchall(const struct minimask *);
+\f
+/* Returns the value of the OpenFlow 1.1+ "metadata" field in 'flow'. */
+static inline ovs_be64
+miniflow_get_metadata(const struct miniflow *flow)
+{
+    enum { MD_OFS = offsetof(struct flow, metadata) };
+    BUILD_ASSERT_DECL(MD_OFS % sizeof(uint32_t) == 0);
+    ovs_be32 hi = (OVS_FORCE ovs_be32) miniflow_get(flow, MD_OFS / 4);
+    ovs_be32 lo = (OVS_FORCE ovs_be32) miniflow_get(flow, MD_OFS / 4 + 1);
+
+    return htonll(((uint64_t) ntohl(hi) << 32) | ntohl(lo));
+}
+
+/* Returns the mask for the OpenFlow 1.1+ "metadata" field in 'mask'.
+ *
+ * The return value is all-1-bits if 'mask' matches on the whole value of the
+ * metadata field, all-0-bits if 'mask' entirely wildcards the metadata field,
+ * or some other value if the metadata field is partially matched, partially
+ * wildcarded. */
+static inline ovs_be64
+minimask_get_metadata_mask(const struct minimask *mask)
+{
+    return miniflow_get_metadata(&mask->masks);
+}
 
 #endif /* flow.h */
index e02f035..cef96a9 100644 (file)
@@ -59,22 +59,21 @@ static void jsonrpc_cleanup(struct jsonrpc *);
 static void jsonrpc_error(struct jsonrpc *, int error);
 
 /* This is just the same as stream_open() except that it uses the default
- * JSONRPC ports if none is specified. */
+ * JSONRPC port if none is specified. */
 int
 jsonrpc_stream_open(const char *name, struct stream **streamp, uint8_t dscp)
 {
-    return stream_open_with_default_ports(name, JSONRPC_TCP_PORT,
-                                          JSONRPC_SSL_PORT, streamp,
-                                          dscp);
+    return stream_open_with_default_port(name, OVSDB_OLD_PORT,
+                                         streamp, dscp);
 }
 
 /* This is just the same as pstream_open() except that it uses the default
- * JSONRPC ports if none is specified. */
+ * JSONRPC port if none is specified. */
 int
 jsonrpc_pstream_open(const char *name, struct pstream **pstreamp, uint8_t dscp)
 {
-    return pstream_open_with_default_ports(name, JSONRPC_TCP_PORT,
-                                           JSONRPC_SSL_PORT, pstreamp, dscp);
+    return pstream_open_with_default_port(name, OVSDB_OLD_PORT,
+                                          pstreamp, dscp);
 }
 
 /* Returns a new JSON-RPC stream that uses 'stream' for input and output.  The
index 0ae205d..5397200 100644 (file)
@@ -34,10 +34,12 @@ struct stream;
 
 /* Default port numbers.
  *
- * There is nothing standard about these port numbers.  They are simply what
- * we have chosen. */
-#define JSONRPC_TCP_PORT 6632
-#define JSONRPC_SSL_PORT 6632
+ * OVSDB_OLD_PORT defines the original port number used by OVS.
+ * OVSDB_PORT defines the official port number assigned by IANA.  By
+ * default, we still uses OVSDB_OLD_PORT, but we present a warning that
+ * that will change. */
+#define OVSDB_OLD_PORT 6632
+#define OVSDB_PORT 6640
 
 int jsonrpc_stream_open(const char *name, struct stream **, uint8_t dscp);
 int jsonrpc_pstream_open(const char *name, struct pstream **, uint8_t dscp);
index 43e5592..d8f3952 100644 (file)
@@ -35,7 +35,6 @@
 VLOG_DEFINE_THIS_MODULE(lockfile);
 
 COVERAGE_DEFINE(lockfile_lock);
-COVERAGE_DEFINE(lockfile_timeout);
 COVERAGE_DEFINE(lockfile_error);
 COVERAGE_DEFINE(lockfile_unlock);
 
index 80dac69..fe06744 100644 (file)
@@ -29,9 +29,6 @@
 #include "unaligned.h"
 #include "util.h"
 #include "vlan-bitmap.h"
-#include "vlog.h"
-
-VLOG_DEFINE_THIS_MODULE(mac_learning);
 
 COVERAGE_DEFINE(mac_learning_learned);
 COVERAGE_DEFINE(mac_learning_expired);
index 03413fa..93f61f9 100644 (file)
 #include "dynamic-string.h"
 #include "ofp-util.h"
 #include "packets.h"
-#include "vlog.h"
-
-VLOG_DEFINE_THIS_MODULE(match);
-
 
 /* Converts the flow in 'flow' into a match in 'match', with the given
  * 'wildcards'. */
@@ -835,7 +831,7 @@ match_format(const struct match *match, struct ds *s, unsigned int priority)
 
     int i;
 
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 20);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 21);
 
     if (priority != OFP_DEFAULT_PRIORITY) {
         ds_put_format(s, "priority=%u,", priority);
@@ -1091,8 +1087,8 @@ match_print(const struct match *match)
 void
 minimatch_init(struct minimatch *dst, const struct match *src)
 {
-    miniflow_init(&dst->flow, &src->flow);
     minimask_init(&dst->mask, &src->wc);
+    miniflow_init_with_minimask(&dst->flow, &src->flow, &dst->mask);
 }
 
 /* Initializes 'dst' as a copy of 'src'.  The caller must eventually free 'dst'
@@ -1145,6 +1141,35 @@ minimatch_hash(const struct minimatch *match, uint32_t basis)
     return miniflow_hash(&match->flow, minimask_hash(&match->mask, basis));
 }
 
+/* Returns true if 'target' satisifies 'match', that is, if each bit for which
+ * 'match' specifies a particular value has the correct value in 'target'.
+ *
+ * This function is equivalent to miniflow_equal_flow_in_minimask(&match->flow,
+ * target, &match->mask) but it is faster because of the invariant that
+ * match->flow.map and match->mask.map are the same. */
+bool
+minimatch_matches_flow(const struct minimatch *match,
+                       const struct flow *target)
+{
+    const uint32_t *target_u32 = (const uint32_t *) target;
+    const uint32_t *flowp = match->flow.values;
+    const uint32_t *maskp = match->mask.masks.values;
+    int i;
+
+    for (i = 0; i < MINI_N_MAPS; i++) {
+        uint32_t map;
+
+        for (map = match->flow.map[i]; map; map = zero_rightmost_1bit(map)) {
+            if ((*flowp++ ^ target_u32[raw_ctz(map)]) & *maskp++) {
+                return false;
+            }
+        }
+        target_u32 += 32;
+    }
+
+    return true;
+}
+
 /* Appends a string representation of 'match' to 's'.  If 'priority' is
  * different from OFP_DEFAULT_PRIORITY, includes it in 's'. */
 void
index 7b104ee..48c8aa2 100644 (file)
@@ -132,13 +132,15 @@ void match_print(const struct match *);
 
 /* A sparse representation of a "struct match".
  *
- * This has the same invariant as "struct match", that is, a 1-bit in the
- * 'flow' must correspond to a 1-bit in 'mask'.
+ * There are two invariants:
  *
- * The invariants for the underlying miniflow and minimask are also maintained,
- * which means that 'flow' and 'mask' can have different 'map's.  In
- * particular, if the match checks that a given 32-bit field has value 0, then
- * 'map' will have a 1-bit in 'mask' but a 0-bit in 'flow' for that field. */
+ *   - The same invariant as "struct match", that is, a 1-bit in the 'flow'
+ *     must correspond to a 1-bit in 'mask'.
+ *
+ *   - 'flow' and 'mask' have the same 'map'.  This implies that 'flow' and
+ *     'mask' have the same part of "struct flow" at the same offset into
+ *     'values', which makes minimatch_matches_flow() faster.
+ */
 struct minimatch {
     struct miniflow flow;
     struct minimask mask;
@@ -154,6 +156,8 @@ void minimatch_expand(const struct minimatch *, struct match *);
 bool minimatch_equal(const struct minimatch *a, const struct minimatch *b);
 uint32_t minimatch_hash(const struct minimatch *, uint32_t basis);
 
+bool minimatch_matches_flow(const struct minimatch *, const struct flow *);
+
 void minimatch_format(const struct minimatch *, struct ds *,
                       unsigned int priority);
 char *minimatch_to_string(const struct minimatch *, unsigned int priority);
index 23c05c5..3420503 100644 (file)
@@ -40,7 +40,6 @@ VLOG_DEFINE_THIS_MODULE(netlink_socket);
 COVERAGE_DEFINE(netlink_overflow);
 COVERAGE_DEFINE(netlink_received);
 COVERAGE_DEFINE(netlink_recv_jumbo);
-COVERAGE_DEFINE(netlink_send);
 COVERAGE_DEFINE(netlink_sent);
 
 /* Linux header file confusion causes this to be undefined. */
index 50444ab..40477ea 100644 (file)
@@ -322,7 +322,7 @@ nl_msg_push_unspec_uninit(struct ofpbuf *msg, uint16_t type, size_t size)
 {
     size_t total_size = NLA_HDRLEN + size;
     struct nlattr* nla = nl_msg_push_uninit(msg, total_size);
-    ovs_assert(NLA_ALIGN(total_size) <= UINT16_MAX);
+    ovs_assert(!nl_attr_oversized(size));
     nla->nla_len = total_size;
     nla->nla_type = type;
     return nla + 1;
@@ -468,6 +468,16 @@ nl_msg_next(struct ofpbuf *buffer, struct ofpbuf *msg)
     msg->size = 0;
     return NULL;
 }
+
+/* Returns true if a Netlink attribute with a payload that is 'payload_size'
+ * bytes long would be oversized, that is, if it's not possible to create an
+ * nlattr of that size because its size wouldn't fit in the 16-bit nla_len
+ * field. */
+bool
+nl_attr_oversized(size_t payload_size)
+{
+    return NL_ATTR_SIZE(payload_size) > UINT16_MAX;
+}
 \f
 /* Attributes. */
 
index afe2277..21d49d3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2013 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -103,6 +103,8 @@ struct nlmsghdr *nl_msg_next(struct ofpbuf *buffer, struct ofpbuf *msg);
 #define NL_A_BE32_SIZE NL_ATTR_SIZE(sizeof(ovs_be32))
 #define NL_A_BE64_SIZE NL_ATTR_SIZE(sizeof(ovs_be64))
 #define NL_A_FLAG_SIZE NL_ATTR_SIZE(0)
+
+bool nl_attr_oversized(size_t payload_size);
 \f
 /* Netlink attribute types. */
 enum nl_attr_type
index 2d7ee34..8444ab7 100644 (file)
@@ -570,7 +570,7 @@ nx_put_raw(struct ofpbuf *b, bool oxm, const struct match *match,
     int match_len;
     int i;
 
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 20);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 21);
 
     /* Metadata. */
     if (match->wc.masks.in_port.ofp_port) {
index aec4196..5c7ccfb 100644 (file)
@@ -51,7 +51,8 @@ static const char *delimiters = ", \t\r\n";
 static int parse_odp_key_mask_attr(const char *, const struct simap *port_names,
                               struct ofpbuf *, struct ofpbuf *);
 static void format_odp_key_attr(const struct nlattr *a,
-                                const struct nlattr *ma, struct ds *ds,
+                                const struct nlattr *ma,
+                                const struct hmap *portno_names, struct ds *ds,
                                 bool verbose);
 
 /* Returns one the following for the action with the given OVS_ACTION_ATTR_*
@@ -401,7 +402,7 @@ format_odp_action(struct ds *ds, const struct nlattr *a)
         break;
     case OVS_ACTION_ATTR_SET:
         ds_put_cstr(ds, "set(");
-        format_odp_key_attr(nl_attr_get(a), NULL, ds, true);
+        format_odp_key_attr(nl_attr_get(a), NULL, NULL, ds, true);
         ds_put_cstr(ds, ")");
         break;
     case OVS_ACTION_ATTR_PUSH_VLAN:
@@ -935,10 +936,49 @@ odp_mask_attr_is_exact(const struct nlattr *ma)
     return is_exact;
 }
 
+void
+odp_portno_names_set(struct hmap *portno_names, odp_port_t port_no,
+                     char *port_name)
+{
+    struct odp_portno_names *odp_portno_names;
+
+    odp_portno_names = xmalloc(sizeof *odp_portno_names);
+    odp_portno_names->port_no = port_no;
+    odp_portno_names->name = xstrdup(port_name);
+    hmap_insert(portno_names, &odp_portno_names->hmap_node,
+                hash_odp_port(port_no));
+}
+
+static char *
+odp_portno_names_get(const struct hmap *portno_names, odp_port_t port_no)
+{
+    struct odp_portno_names *odp_portno_names;
+
+    HMAP_FOR_EACH_IN_BUCKET (odp_portno_names, hmap_node,
+                             hash_odp_port(port_no), portno_names) {
+        if (odp_portno_names->port_no == port_no) {
+            return odp_portno_names->name;
+        }
+    }
+    return NULL;
+}
+
+void
+odp_portno_names_destroy(struct hmap *portno_names)
+{
+    struct odp_portno_names *odp_portno_names, *odp_portno_names_next;
+    HMAP_FOR_EACH_SAFE (odp_portno_names, odp_portno_names_next,
+                        hmap_node, portno_names) {
+        hmap_remove(portno_names, &odp_portno_names->hmap_node);
+        free(odp_portno_names->name);
+        free(odp_portno_names);
+    }
+}
 
 static void
 format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma,
-                    struct ds *ds, bool verbose)
+                    const struct hmap *portno_names, struct ds *ds,
+                    bool verbose)
 {
     struct flow_tnl tun_key;
     enum ovs_key_attr attr = nl_attr_type(a);
@@ -981,10 +1021,11 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma,
     case OVS_KEY_ATTR_ENCAP:
         if (ma && nl_attr_get_size(ma) && nl_attr_get_size(a)) {
             odp_flow_format(nl_attr_get(a), nl_attr_get_size(a),
-                            nl_attr_get(ma), nl_attr_get_size(ma), ds, verbose);
-        } else if (nl_attr_get_size(a)) {
-            odp_flow_format(nl_attr_get(a), nl_attr_get_size(a), NULL, 0, ds,
+                            nl_attr_get(ma), nl_attr_get_size(ma), NULL, ds,
                             verbose);
+        } else if (nl_attr_get_size(a)) {
+            odp_flow_format(nl_attr_get(a), nl_attr_get_size(a), NULL, 0, NULL,
+                            ds, verbose);
         }
         break;
 
@@ -1038,9 +1079,19 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma,
         break;
 
     case OVS_KEY_ATTR_IN_PORT:
-        ds_put_format(ds, "%"PRIu32, nl_attr_get_u32(a));
-        if (!is_exact) {
-            ds_put_format(ds, "/%#"PRIx32, nl_attr_get_u32(ma));
+        if (portno_names && verbose && is_exact) {
+            char *name = odp_portno_names_get(portno_names,
+                            u32_to_odp(nl_attr_get_u32(a)));
+            if (name) {
+                ds_put_format(ds, "%s", name);
+            } else {
+                ds_put_format(ds, "%"PRIu32, nl_attr_get_u32(a));
+            }
+        } else {
+            ds_put_format(ds, "%"PRIu32, nl_attr_get_u32(a));
+            if (!is_exact) {
+                ds_put_format(ds, "/%#"PRIx32, nl_attr_get_u32(ma));
+            }
         }
         break;
 
@@ -1364,7 +1415,7 @@ generate_all_wildcard_mask(struct ofpbuf *ofp, const struct nlattr *key)
 void
 odp_flow_format(const struct nlattr *key, size_t key_len,
                 const struct nlattr *mask, size_t mask_len,
-                struct ds *ds, bool verbose)
+                const struct hmap *portno_names, struct ds *ds, bool verbose)
 {
     if (key_len) {
         const struct nlattr *a;
@@ -1398,7 +1449,7 @@ odp_flow_format(const struct nlattr *key, size_t key_len,
                 if (!first_field) {
                     ds_put_char(ds, ',');
                 }
-                format_odp_key_attr(a, ma, ds, verbose);
+                format_odp_key_attr(a, ma, portno_names, ds, verbose);
                 first_field = false;
             }
             ofpbuf_clear(&ofp);
@@ -1435,7 +1486,7 @@ void
 odp_flow_key_format(const struct nlattr *key,
                     size_t key_len, struct ds *ds)
 {
-    odp_flow_format(key, key_len, NULL, 0, ds, true);
+    odp_flow_format(key, key_len, NULL, 0, NULL, ds, true);
 }
 
 static void
@@ -2503,9 +2554,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data,
         arp_key->arp_op = htons(data->nw_proto);
         memcpy(arp_key->arp_sha, data->arp_sha, ETH_ADDR_LEN);
         memcpy(arp_key->arp_tha, data->arp_tha, ETH_ADDR_LEN);
-    }
-
-    if (flow->mpls_depth) {
+    } else if (eth_type_mpls(flow->dl_type)) {
         struct ovs_key_mpls *mpls_key;
 
         mpls_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_MPLS,
@@ -2798,7 +2847,6 @@ parse_l2_5_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1],
                return ODP_FIT_TOO_LITTLE;
            }
            flow->mpls_lse = nl_attr_get_be32(attrs[OVS_KEY_ATTR_MPLS]);
-           flow->mpls_depth++;
         } else if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_MPLS)) {
             flow->mpls_lse = nl_attr_get_be32(attrs[OVS_KEY_ATTR_MPLS]);
 
@@ -2806,10 +2854,6 @@ parse_l2_5_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1],
                 return ODP_FIT_ERROR;
             }
             expected_attrs |= (UINT64_C(1) << OVS_KEY_ATTR_MPLS);
-            if (flow->mpls_lse) {
-                /* XXX Is this needed? */
-                flow->mpls_depth = 0xffff;
-            }
         }
         goto done;
     } else if (src_flow->dl_type == htons(ETH_TYPE_IP)) {
@@ -3325,10 +3369,10 @@ commit_set_ether_addr_action(const struct flow *flow, struct flow *base,
 }
 
 static void
-commit_vlan_action(const struct flow *flow, struct flow *base,
+commit_vlan_action(ovs_be16 vlan_tci, struct flow *base,
                    struct ofpbuf *odp_actions, struct flow_wildcards *wc)
 {
-    if (base->vlan_tci == flow->vlan_tci) {
+    if (base->vlan_tci == vlan_tci) {
         return;
     }
 
@@ -3338,61 +3382,57 @@ commit_vlan_action(const struct flow *flow, struct flow *base,
         nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_POP_VLAN);
     }
 
-    if (flow->vlan_tci & htons(VLAN_CFI)) {
+    if (vlan_tci & htons(VLAN_CFI)) {
         struct ovs_action_push_vlan vlan;
 
         vlan.vlan_tpid = htons(ETH_TYPE_VLAN);
-        vlan.vlan_tci = flow->vlan_tci;
+        vlan.vlan_tci = vlan_tci;
         nl_msg_put_unspec(odp_actions, OVS_ACTION_ATTR_PUSH_VLAN,
                           &vlan, sizeof vlan);
     }
-    base->vlan_tci = flow->vlan_tci;
+    base->vlan_tci = vlan_tci;
 }
 
 static void
 commit_mpls_action(const struct flow *flow, struct flow *base,
-                   struct ofpbuf *odp_actions, struct flow_wildcards *wc)
+                   struct ofpbuf *odp_actions, struct flow_wildcards *wc,
+                   int *mpls_depth_delta)
 {
-    if (flow->mpls_lse == base->mpls_lse &&
-        flow->mpls_depth == base->mpls_depth) {
+    if (flow->mpls_lse == base->mpls_lse && !*mpls_depth_delta) {
         return;
     }
 
     memset(&wc->masks.mpls_lse, 0xff, sizeof wc->masks.mpls_lse);
 
-    if (flow->mpls_depth < base->mpls_depth) {
-        if (base->mpls_depth - flow->mpls_depth > 1) {
-            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10);
-            VLOG_WARN_RL(&rl, "Multiple mpls_pop actions reduced to "
-                         " a single mpls_pop action");
-        }
-
+    switch (*mpls_depth_delta) {
+    case -1:
         nl_msg_put_be16(odp_actions, OVS_ACTION_ATTR_POP_MPLS, flow->dl_type);
-    } else if (flow->mpls_depth > base->mpls_depth) {
+        break;
+    case 1: {
         struct ovs_action_push_mpls *mpls;
 
-        if (flow->mpls_depth - base->mpls_depth > 1) {
-            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10);
-            VLOG_WARN_RL(&rl, "Multiple mpls_push actions reduced to "
-                         " a single mpls_push action");
-        }
-
         mpls = nl_msg_put_unspec_uninit(odp_actions, OVS_ACTION_ATTR_PUSH_MPLS,
                                         sizeof *mpls);
         memset(mpls, 0, sizeof *mpls);
         mpls->mpls_ethertype = flow->dl_type;
         mpls->mpls_lse = flow->mpls_lse;
-    } else {
+        break;
+    }
+    case 0: {
         struct ovs_key_mpls mpls_key;
 
         mpls_key.mpls_lse = flow->mpls_lse;
         commit_set_action(odp_actions, OVS_KEY_ATTR_MPLS,
                           &mpls_key, sizeof(mpls_key));
+        break;
+    }
+    default:
+        NOT_REACHED();
     }
 
     base->dl_type = flow->dl_type;
     base->mpls_lse = flow->mpls_lse;
-    base->mpls_depth = flow->mpls_depth;
+    *mpls_depth_delta = 0;
 }
 
 static void
@@ -3563,17 +3603,18 @@ commit_set_pkt_mark_action(const struct flow *flow, struct flow *base,
  * used as part of the action. */
 void
 commit_odp_actions(const struct flow *flow, struct flow *base,
-                   struct ofpbuf *odp_actions, struct flow_wildcards *wc)
+                   struct ofpbuf *odp_actions, struct flow_wildcards *wc,
+                   int *mpls_depth_delta)
 {
     commit_set_ether_addr_action(flow, base, odp_actions, wc);
-    commit_vlan_action(flow, base, odp_actions, wc);
+    commit_vlan_action(flow->vlan_tci, base, odp_actions, wc);
     commit_set_nw_action(flow, base, odp_actions, wc);
     commit_set_port_action(flow, base, odp_actions, wc);
     /* Committing MPLS actions should occur after committing nw and port
      * actions. This is because committing MPLS actions may alter a packet so
      * that it is no longer IP and thus nw and port actions are no longer valid.
      */
-    commit_mpls_action(flow, base, odp_actions, wc);
+    commit_mpls_action(flow, base, odp_actions, wc, mpls_depth_delta);
     commit_set_priority_action(flow, base, odp_actions, wc);
     commit_set_pkt_mark_action(flow, base, odp_actions, wc);
 }
index 192cfa0..2712cb0 100644 (file)
@@ -23,6 +23,7 @@
 #include <string.h>
 #include <linux/openvswitch.h>
 #include "hash.h"
+#include "hmap.h"
 #include "openflow/openflow.h"
 #include "util.h"
 
@@ -42,6 +43,16 @@ void format_odp_actions(struct ds *, const struct nlattr *odp_actions,
 int odp_actions_from_string(const char *, const struct simap *port_names,
                             struct ofpbuf *odp_actions);
 
+/* A map from odp port number to its name. */
+struct odp_portno_names {
+    struct hmap_node hmap_node; /* A node in a port number to name hmap. */
+    odp_port_t port_no;         /* Port number in the datapath. */
+    char *name;                 /* Name associated with the above 'port_no'. */
+};
+
+void odp_portno_names_set(struct hmap *portno_names, odp_port_t port_no,
+                          char *port_name);
+void odp_portno_names_destroy(struct hmap *portno_names);
 /* The maximum number of bytes that odp_flow_key_from_flow() appends to a
  * buffer.  This is the upper bound on the length of a nlattr-formatted flow
  * key that ovs-vswitchd fully understands.
@@ -94,7 +105,8 @@ enum odp_key_fitness odp_tun_key_from_attr(const struct nlattr *,
 
 void odp_flow_format(const struct nlattr *key, size_t key_len,
                      const struct nlattr *mask, size_t mask_len,
-                     struct ds *, bool verbose);
+                     const struct hmap *portno_names, struct ds *,
+                     bool verbose);
 void odp_flow_key_format(const struct nlattr *, size_t, struct ds *);
 int odp_flow_from_string(const char *s,
                          const struct simap *port_names,
@@ -130,8 +142,8 @@ const char *odp_key_fitness_to_string(enum odp_key_fitness);
 void commit_odp_tunnel_action(const struct flow *, struct flow *base,
                               struct ofpbuf *odp_actions);
 void commit_odp_actions(const struct flow *, struct flow *base,
-                        struct ofpbuf *odp_actions,
-                        struct flow_wildcards *wc);
+                        struct ofpbuf *odp_actions, struct flow_wildcards *wc,
+                        int *mpls_depth_delta);
 \f
 /* ofproto-dpif interface.
  *
index dcc82db..65430f3 100644 (file)
@@ -884,14 +884,14 @@ ofpacts_from_openflow11(const union ofp_action *in, size_t n_in,
 /* OpenFlow 1.1 instructions. */
 
 #define DEFINE_INST(ENUM, STRUCT, EXTENSIBLE, NAME)             \
-    static inline const struct STRUCT *                         \
+    static inline const struct STRUCT * OVS_UNUSED              \
     instruction_get_##ENUM(const struct ofp11_instruction *inst)\
     {                                                           \
         ovs_assert(inst->type == htons(ENUM));                  \
         return ALIGNED_CAST(struct STRUCT *, inst);             \
     }                                                           \
                                                                 \
-    static inline void                                          \
+    static inline void OVS_UNUSED                               \
     instruction_init_##ENUM(struct STRUCT *s)                   \
     {                                                           \
         memset(s, 0, sizeof *s);                                \
@@ -899,7 +899,7 @@ ofpacts_from_openflow11(const union ofp_action *in, size_t n_in,
         s->len = htons(sizeof *s);                              \
     }                                                           \
                                                                 \
-    static inline struct STRUCT *                               \
+    static inline struct STRUCT * OVS_UNUSED                    \
     instruction_put_##ENUM(struct ofpbuf *buf)                  \
     {                                                           \
         struct STRUCT *s = ofpbuf_put_uninit(buf, sizeof *s);   \
index 522bd95..7ca7305 100644 (file)
@@ -38,9 +38,6 @@
 #include "packets.h"
 #include "socket-util.h"
 #include "vconn.h"
-#include "vlog.h"
-
-VLOG_DEFINE_THIS_MODULE(ofp_parse);
 
 /* Parses 'str' as an 8-bit unsigned integer into '*valuep'.
  *
index 6a2bf5b..173b534 100644 (file)
@@ -84,7 +84,7 @@ ofputil_netmask_to_wcbits(ovs_be32 netmask)
 void
 ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *wc)
 {
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 20);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 21);
 
     /* Initialize most of wc. */
     flow_wildcards_init_catchall(wc);
@@ -4905,7 +4905,6 @@ ofputil_normalize_match__(struct match *match, bool may_log)
     }
     if (!(may_match & MAY_MPLS)) {
         wc.masks.mpls_lse = htonl(0);
-        wc.masks.mpls_depth = 0;
     }
 
     /* Log any changes. */
index 84e83d8..10784fc 100644 (file)
@@ -1,11 +1,8 @@
 #include <config.h>
+#include "dynamic-string.h"
 #include "ofp-util.h"
 #include "ofp-version-opt.h"
 #include "ovs-thread.h"
-#include "vlog.h"
-#include "dynamic-string.h"
-
-VLOG_DEFINE_THIS_MODULE(ofp_version);
 
 static uint32_t allowed_versions = 0;
 
index 143347c..5dd34b3 100644 (file)
@@ -37,7 +37,6 @@
 
 VLOG_DEFINE_THIS_MODULE(process);
 
-COVERAGE_DEFINE(process_sigchld);
 COVERAGE_DEFINE(process_start);
 
 struct process {
index f11ed05..27da5d6 100644 (file)
@@ -110,7 +110,7 @@ const char *
 signal_name(int signum, char *namebuf, size_t bufsize)
 {
 #if HAVE_DECL_SYS_SIGLIST
-    if (signum >= 0 && signum < ARRAY_SIZE(sys_siglist)) {
+    if (signum >= 0 && signum < N_SIGNALS) {
         const char *name = sys_siglist[signum];
         if (name) {
             return name;
index 3b9270f..1e748c0 100644 (file)
@@ -317,7 +317,7 @@ ssl_open(const char *name, char *suffix, struct stream **streamp, uint8_t dscp)
         return error;
     }
 
-    error = inet_open_active(SOCK_STREAM, suffix, OFP_SSL_PORT, &sin, &fd,
+    error = inet_open_active(SOCK_STREAM, suffix, OFP_OLD_PORT, &sin, &fd,
                              dscp);
     if (fd >= 0) {
         int state = error ? STATE_TCP_CONNECTING : STATE_SSL_CONNECTING;
@@ -797,7 +797,7 @@ pssl_open(const char *name OVS_UNUSED, char *suffix, struct pstream **pstreamp,
         return retval;
     }
 
-    fd = inet_open_passive(SOCK_STREAM, suffix, OFP_SSL_PORT, &sin, dscp);
+    fd = inet_open_passive(SOCK_STREAM, suffix, OFP_OLD_PORT, &sin, dscp);
     if (fd < 0) {
         return -fd;
     }
@@ -846,7 +846,7 @@ pssl_accept(struct pstream *pstream, struct stream **new_streamp)
     }
 
     sprintf(name, "ssl:"IP_FMT, IP_ARGS(sin.sin_addr.s_addr));
-    if (sin.sin_port != htons(OFP_SSL_PORT)) {
+    if (sin.sin_port != htons(OFP_OLD_PORT)) {
         sprintf(strchr(name, '\0'), ":%"PRIu16, ntohs(sin.sin_port));
     }
     return new_ssl_stream(name, new_fd, SERVER, STATE_SSL_CONNECTING, &sin,
index da089ae..0442d84 100644 (file)
@@ -26,6 +26,7 @@
 #include "dynamic-string.h"
 #include "fatal-signal.h"
 #include "flow.h"
+#include "jsonrpc.h"
 #include "ofp-print.h"
 #include "ofpbuf.h"
 #include "openflow/nicira-ext.h"
@@ -717,23 +718,29 @@ count_fields(const char *s_)
     return n;
 }
 
-/* Like stream_open(), but for tcp streams the port defaults to
- * 'default_tcp_port' if no port number is given and for SSL streams the port
- * defaults to 'default_ssl_port' if no port number is given. */
+/* Like stream_open(), but the port defaults to 'default_port' if no port
+ * number is given. */
 int
-stream_open_with_default_ports(const char *name_,
-                               uint16_t default_tcp_port,
-                               uint16_t default_ssl_port,
-                               struct stream **streamp,
-                               uint8_t dscp)
+stream_open_with_default_port(const char *name_,
+                              uint16_t default_port,
+                              struct stream **streamp,
+                              uint8_t dscp)
 {
     char *name;
     int error;
 
-    if (!strncmp(name_, "tcp:", 4) && count_fields(name_) < 3) {
-        name = xasprintf("%s:%d", name_, default_tcp_port);
-    } else if (!strncmp(name_, "ssl:", 4) && count_fields(name_) < 3) {
-        name = xasprintf("%s:%d", name_, default_ssl_port);
+    if ((!strncmp(name_, "tcp:", 4) || !strncmp(name_, "ssl:", 4))
+        && count_fields(name_) < 3) {
+        if (default_port == OFP_OLD_PORT) {
+            VLOG_WARN_ONCE("The default OpenFlow port number will change "
+                           "from %d to %d in a future release",
+                           OFP_OLD_PORT, OFP_PORT);
+        } else if (default_port == OVSDB_OLD_PORT) {
+            VLOG_WARN_ONCE("The default OVSDB port number will change "
+                           "from %d to %d in a future release",
+                           OVSDB_OLD_PORT, OVSDB_PORT);
+        }
+        name = xasprintf("%s:%d", name_, default_port);
     } else {
         name = xstrdup(name_);
     }
@@ -743,23 +750,20 @@ stream_open_with_default_ports(const char *name_,
     return error;
 }
 
-/* Like pstream_open(), but for ptcp streams the port defaults to
- * 'default_ptcp_port' if no port number is given and for passive SSL streams
- * the port defaults to 'default_pssl_port' if no port number is given. */
+/* Like pstream_open(), but port defaults to 'default_port' if no port
+ * number is given. */
 int
-pstream_open_with_default_ports(const char *name_,
-                                uint16_t default_ptcp_port,
-                                uint16_t default_pssl_port,
-                                struct pstream **pstreamp,
-                                uint8_t dscp)
+pstream_open_with_default_port(const char *name_,
+                               uint16_t default_port,
+                               struct pstream **pstreamp,
+                               uint8_t dscp)
 {
     char *name;
     int error;
 
-    if (!strncmp(name_, "ptcp:", 5) && count_fields(name_) < 2) {
-        name = xasprintf("%s%d", name_, default_ptcp_port);
-    } else if (!strncmp(name_, "pssl:", 5) && count_fields(name_) < 2) {
-        name = xasprintf("%s%d", name_, default_pssl_port);
+    if ((!strncmp(name_, "ptcp:", 5) || !strncmp(name_, "pssl:", 5))
+        && count_fields(name_) < 2) {
+        name = xasprintf("%s%d", name_, default_port);
     } else {
         name = xstrdup(name_);
     }
@@ -778,15 +782,12 @@ pstream_open_with_default_ports(const char *name_,
  *     - On error, function returns false and *sin contains garbage.
  */
 bool
-stream_parse_target_with_default_ports(const char *target,
-                                       uint16_t default_tcp_port,
-                                       uint16_t default_ssl_port,
-                                       struct sockaddr_in *sin)
-{
-    return (!strncmp(target, "tcp:", 4)
-             && inet_parse_active(target + 4, default_tcp_port, sin)) ||
-            (!strncmp(target, "ssl:", 4)
-             && inet_parse_active(target + 4, default_ssl_port, sin));
+stream_parse_target_with_default_port(const char *target,
+                                      uint16_t default_port,
+                                      struct sockaddr_in *sin)
+{
+    return ((!strncmp(target, "tcp:", 4) || !strncmp(target, "ssl:", 4))
+             && inet_parse_active(target + 4, default_port, sin));
 }
 
 /* Attempts to guess the content type of a stream whose first few bytes were
index aa3fa9d..d966cde 100644 (file)
@@ -71,19 +71,16 @@ ovs_be16 pstream_get_bound_port(const struct pstream *);
 \f
 /* Convenience functions. */
 
-int stream_open_with_default_ports(const char *name,
-                                   uint16_t default_tcp_port,
-                                   uint16_t default_ssl_port,
-                                   struct stream **,
+int stream_open_with_default_port(const char *name,
+                                  uint16_t default_port,
+                                  struct stream **,
+                                  uint8_t dscp);
+int pstream_open_with_default_port(const char *name,
+                                   uint16_t default_port,
+                                   struct pstream **,
                                    uint8_t dscp);
-int pstream_open_with_default_ports(const char *name,
-                                    uint16_t default_ptcp_port,
-                                    uint16_t default_pssl_port,
-                                    struct pstream **,
-                                    uint8_t dscp);
-bool stream_parse_target_with_default_ports(const char *target,
-                                           uint16_t default_tcp_port,
-                                           uint16_t default_ssl_port,
+bool stream_parse_target_with_default_port(const char *target,
+                                           uint16_t default_port,
                                            struct sockaddr_in *sin);
 int stream_or_pstream_needs_probes(const char *name);
 
diff --git a/lib/tag.c b/lib/tag.c
new file mode 100644 (file)
index 0000000..13d1829
--- /dev/null
+++ b/lib/tag.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2008, 2009, 2010, 2011, 2013 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "tag.h"
+
+#define LOG2_N_TAG_BITS (N_TAG_BITS == 32 ? 5 : N_TAG_BITS == 64 ? 6 : 0)
+BUILD_ASSERT_DECL(LOG2_N_TAG_BITS > 0);
+
+/* Returns a tag deterministically generated from 'seed'.
+ *
+ * 'seed' should have data in all of its bits; if it has data only in its
+ * low-order bits then the resulting tags will be poorly distributed.  Use a
+ * hash function such as hash_bytes() to generate 'seed' if necessary. */
+tag_type
+tag_create_deterministic(uint32_t seed)
+{
+    int x = seed & (N_TAG_BITS - 1);
+    int y = (seed >> LOG2_N_TAG_BITS) % (N_TAG_BITS - 1);
+    y += y >= x;
+    return (1u << x) | (1u << y);
+}
+
+/* Initializes 'tracker'. */
+void
+tag_tracker_init(struct tag_tracker *tracker)
+{
+    memset(tracker, 0, sizeof *tracker);
+}
+
+/* Adds 'add' to '*tags' and records the bits added in 'tracker'. */
+void
+tag_tracker_add(struct tag_tracker *tracker, tag_type *tags, tag_type add)
+{
+    *tags |= add;
+    for (; add; add = zero_rightmost_1bit(add)) {
+        tracker->counts[rightmost_1bit_idx(add)]++;
+    }
+}
+
+/* Removes 'sub' from 'tracker' and unsets any bits in '*tags' that no
+ * remaining tag includes. */
+void
+tag_tracker_subtract(struct tag_tracker *tracker, tag_type *tags, tag_type sub)
+{
+    for (; sub; sub = zero_rightmost_1bit(sub)) {
+        if (!--tracker->counts[rightmost_1bit_idx(sub)]) {
+            *tags &= ~rightmost_1bit(sub);
+        }
+    }
+}
diff --git a/lib/tag.h b/lib/tag.h
new file mode 100644 (file)
index 0000000..c99fd09
--- /dev/null
+++ b/lib/tag.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2008, 2011, 2012, 2013 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TAG_H
+#define TAG_H 1
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <limits.h>
+#include "util.h"
+
+/*
+ * Tagging support.
+ *
+ * A 'tag' represents an arbitrary category.  Currently, tags are used to
+ * represent categories of flows and in particular the value of the 64-bit
+ * "metadata" field in the flow.  The universe of possible categories is very
+ * large (2**64).  The number of categories in use at a given time can also be
+ * large.  This means that keeping track of category membership via
+ * conventional means (lists, bitmaps, etc.) is likely to be expensive.
+ *
+ * Tags are actually implemented via a "superimposed coding", as discussed in
+ * Knuth TAOCP v.3 section 6.5 "Retrieval on Secondary Keys".  A tag is an
+ * unsigned integer in which exactly 2 bits are set to 1 and the rest set to 0.
+ * For 32-bit integers (as currently used) there are 32 * 31 / 2 = 496 unique
+ * tags; for 64-bit integers there are 64 * 63 / 2 = 2,016.
+ *
+ * Because there is a small finite number of unique tags, tags must collide
+ * after some number of them have been created.  In practice we generally
+ * create tags by choosing bits randomly or based on a hash function.
+ *
+ * The key property of tags is that we can combine them without increasing the
+ * amount of data required using bitwise-OR, since the result has the 1-bits
+ * from both tags set.  The necessary tradeoff is that the result is even more
+ * ambiguous: if combining two tags yields a value with 4 bits set to 1, then
+ * the result value will test as having 4 * 3 / 2 = 6 unique tags, not just the
+ * two tags that we combined.
+ *
+ * The upshot is this: a value that is the bitwise-OR combination of a number
+ * of tags will always include the tags that were combined, but it may contain
+ * any number of additional tags as well.  This is acceptable for our use,
+ * since we want to be sure that we check every classifier table that contains
+ * a rule with a given metadata value, but it is OK if we check a few extra
+ * tables as well.
+ *
+ * If we combine too many tags, then the result will have every bit set, so
+ * that it will test as including every tag.  This can happen, but we hope that
+ * this is not the common case.
+ */
+
+/* Represents a tag, or the combination of 0 or more tags. */
+typedef uint32_t tag_type;
+
+#define N_TAG_BITS (CHAR_BIT * sizeof(tag_type))
+BUILD_ASSERT_DECL(IS_POW2(N_TAG_BITS));
+
+/* A 'tag_type' value that intersects every tag. */
+#define TAG_ALL UINT32_MAX
+
+/* An arbitrary tag. */
+#define TAG_ARBITRARY UINT32_C(3)
+
+tag_type tag_create_deterministic(uint32_t seed);
+static inline bool tag_intersects(tag_type, tag_type);
+
+/* Returns true if 'a' and 'b' have at least one tag in common,
+ * false if their set of tags is disjoint. */
+static inline bool
+tag_intersects(tag_type a, tag_type b)
+{
+    tag_type x = a & b;
+    return (x & (x - 1)) != 0;
+}
+\f
+/* Adding tags is easy, but subtracting is hard because you can't tell whether
+ * a bit was set only by the tag you're removing or by multiple tags.  The
+ * tag_tracker data structure counts the number of tags that set each bit,
+ * which allows for efficient subtraction. */
+struct tag_tracker {
+    unsigned int counts[N_TAG_BITS];
+};
+
+void tag_tracker_init(struct tag_tracker *);
+void tag_tracker_add(struct tag_tracker *, tag_type *, tag_type);
+void tag_tracker_subtract(struct tag_tracker *, tag_type *, tag_type);
+
+#endif /* tag.h */
index 223ed30..64ae845 100644 (file)
@@ -234,6 +234,7 @@ time_poll(struct pollfd *pollfds, int n_pollfds, long long int timeout_when,
         log_poll_interval(*last_wakeup);
     }
     coverage_clear();
+    coverage_run();
     start = time_msec();
 
     timeout_when = MIN(timeout_when, deadline);
index 0db41be..a899065 100644 (file)
@@ -87,8 +87,23 @@ void ovs_assert_failure(const char *, const char *, const char *) NO_RETURN;
 
 extern const char *program_name;
 
+#define __ARRAY_SIZE_NOCHECK(ARRAY) (sizeof(ARRAY) / sizeof((ARRAY)[0]))
+#ifdef __GNUC__
+/* return 0 for array types, 1 otherwise */
+#define __ARRAY_CHECK(ARRAY)                                   \
+    !__builtin_types_compatible_p(typeof(ARRAY), typeof(&ARRAY[0]))
+
+/* compile-time fail if not array */
+#define __ARRAY_FAIL(ARRAY) (sizeof(char[-2*!__ARRAY_CHECK(ARRAY)]))
+#define __ARRAY_SIZE(ARRAY)                                    \
+    __builtin_choose_expr(__ARRAY_CHECK(ARRAY),                        \
+        __ARRAY_SIZE_NOCHECK(ARRAY), __ARRAY_FAIL(ARRAY))
+#else
+#define __ARRAY_SIZE(ARRAY) __ARRAY_SIZE_NOCHECK(ARRAY)
+#endif
+
 /* Returns the number of elements in ARRAY. */
-#define ARRAY_SIZE(ARRAY) (sizeof ARRAY / sizeof *ARRAY)
+#define ARRAY_SIZE(ARRAY) __ARRAY_SIZE(ARRAY)
 
 /* Returns X / Y, rounding up.  X must be nonnegative to round correctly. */
 #define DIV_ROUND_UP(X, Y) (((X) + ((Y) - 1)) / (Y))
index be96ca8..bf7aaf7 100644 (file)
@@ -1,13 +1,13 @@
 .IP "\fBssl:\fIip\fR[\fB:\fIport\fR]"
-The specified SSL \fIport\fR (default: 6633) on the host at the given
-\fIip\fR, which must be expressed as an IP address (not a DNS name).
-The \fB\-\-private\-key\fR, \fB\-\-certificate\fR, and
-\fB\-\-ca\-cert\fR options are mandatory when this form is used.
-.
-.IP "\fBtcp:\fIip\fR[\fB:\fIport\fR]"
-The specified TCP \fIport\fR (default: 6633) on the host at the given
-\fIip\fR, which must be expressed as an IP address (not a DNS name).
-.
+.IQ "\fBtcp:\fIip\fR[\fB:\fIport\fR]"
+The specified \fIport\fR on the host at the given \fIip\fR, which must
+be expressed as an IP address (not a DNS name).  For \fBssl\fR, the
+\fB\-\-private\-key\fR, \fB\-\-certificate\fR, and \fB\-\-ca\-cert\fR
+options are mandatory.
+.IP
+If \fIport\fR is not specified, it currently defaults to 6633.  In the
+future, the default will change to 6653, which is the IANA-defined
+value.
 .TP
 \fBunix:\fIfile\fR
 The Unix domain server socket named \fIfile\fR.
index 1edd118..a9efdb3 100644 (file)
@@ -1,16 +1,14 @@
 .IP "\fBpssl:\fR[\fIport\fR][\fB:\fIip\fR]"
-Listens for OpenFlow SSL connections on \fIport\fR (default: 6633).
-The \fB\-\-private\-key\fR, \fB\-\-certificate\fR, and
-\fB\-\-ca\-cert\fR options are mandatory when this form is used.  By
-default, connections are not bound to a particular local IP address,
-but \fIip\fR may be specified to listen only for connections to the
-given \fIip\fR.
-.
-.IP "\fBptcp:\fR[\fIport\fR][\fB:\fIip\fR]"
-Listens for OpenFlow TCP connections on \fIport\fR (default: 6633).
-By default, connections are not bound to a particular local IP
-address, but \fIip\fR may be specified to listen only for connections
-to the given \fIip\fR.
+.IQ "\fBptcp:\fR[\fIport\fR][\fB:\fIip\fR]"
+Listens for OpenFlow connections on \fIport\fR.  By
+default, connections are not bound to a particular local IP address, but
+\fIip\fR may be specified to listen only for connections to the given
+\fIip\fR.  For \fBpssl\fR, the \fB\-\-private\-key\fR,
+\fB\-\-certificate\fR, and \fB\-\-ca\-cert\fR options are mandatory.
+.IP
+If \fIport\fR is not specified, it currently defaults to 6633.  In the
+future, the default will change to 6653, which is the IANA-defined
+value.
 .
 .IP "\fBpunix:\fIfile\fR"
 Listens for OpenFlow connections on the Unix domain server socket
index 92076d9..027f48f 100644 (file)
@@ -82,8 +82,7 @@ vconn_stream_open(const char *name, uint32_t allowed_versions,
     struct stream *stream;
     int error;
 
-    error = stream_open_with_default_ports(name, OFP_TCP_PORT, OFP_SSL_PORT,
-                                           &stream, dscp);
+    error = stream_open_with_default_port(name, OFP_OLD_PORT, &stream, dscp);
     if (!error) {
         error = stream_connect(stream);
         if (!error || error == EAGAIN) {
@@ -316,8 +315,8 @@ pvconn_pstream_listen(const char *name, uint32_t allowed_versions,
     struct pstream *pstream;
     int error;
 
-    error = pstream_open_with_default_ports(name, OFP_TCP_PORT, OFP_SSL_PORT,
-                                            &pstream, dscp);
+    error = pstream_open_with_default_port(name, OFP_OLD_PORT,
+                                           &pstream, dscp);
     if (error) {
         return error;
     }
index 15ac119..5708987 100644 (file)
@@ -138,10 +138,10 @@ vconn_usage(bool active, bool passive, bool bootstrap OVS_UNUSED)
     if (active) {
         printf("Active OpenFlow connection methods:\n");
         printf("  tcp:IP[:PORT]           "
-               "PORT (default: %d) at remote IP\n", OFP_TCP_PORT);
+               "PORT (default: %d) at remote IP\n", OFP_OLD_PORT);
 #ifdef HAVE_OPENSSL
         printf("  ssl:IP[:PORT]           "
-               "SSL PORT (default: %d) at remote IP\n", OFP_SSL_PORT);
+               "SSL PORT (default: %d) at remote IP\n", OFP_OLD_PORT);
 #endif
         printf("  unix:FILE               Unix domain socket named FILE\n");
     }
@@ -150,11 +150,11 @@ vconn_usage(bool active, bool passive, bool bootstrap OVS_UNUSED)
         printf("Passive OpenFlow connection methods:\n");
         printf("  ptcp:[PORT][:IP]        "
                "listen to TCP PORT (default: %d) on IP\n",
-               OFP_TCP_PORT);
+               OFP_OLD_PORT);
 #ifdef HAVE_OPENSSL
         printf("  pssl:[PORT][:IP]        "
                "listen for SSL on PORT (default: %d) on IP\n",
-               OFP_SSL_PORT);
+               OFP_OLD_PORT);
 #endif
         printf("  punix:FILE              "
                "listen on Unix domain socket FILE\n");
index 282d28c..6531bf0 100644 (file)
@@ -303,7 +303,7 @@ vlandev_stub_del(const char *vlan_dev OVS_UNUSED)
     return EOPNOTSUPP;
 }
 
-static const struct vlandev_class vlandev_stub_class = {
+static const struct vlandev_class OVS_UNUSED vlandev_stub_class = {
     NULL,                       /* vd_refresh */
     vlandev_stub_add,
     vlandev_stub_del
index 37806b8..b1ca158 100644 (file)
@@ -42,8 +42,6 @@
 
 VLOG_DEFINE_THIS_MODULE(vlog);
 
-COVERAGE_DEFINE(vlog_recursive);
-
 /* ovs_assert() logs the assertion message, so using ovs_assert() in this
  * source file could cause recursion. */
 #undef ovs_assert
index 811d2f9..2a34f04 100644 (file)
@@ -116,6 +116,10 @@ lib/vconn-active.man:
 lib/vconn-passive.man:
 lib/vlog.man:
 
+utilities/ovs-dpctl-top.8: \
+       utilities/ovs-dpctl-top.8.in
+utilities/ovs-dpctl-top.8.in:
+
 utilities/ovs-dpctl.8: \
        utilities/ovs-dpctl.8.in \
        lib/common.man \
@@ -124,10 +128,6 @@ utilities/ovs-dpctl.8.in:
 lib/common.man:
 lib/vlog.man:
 
-utilities/ovs-dpctl-top.8: \
-       utilities/ovs-dpctl-top.8.in
-utilities/ovs-dpctl-top.8.in:
-
 utilities/ovs-l3ping.8: \
        utilities/ovs-l3ping.8.in \
        lib/common-syn.man \
index 4a370eb..8a4195d 100644 (file)
@@ -692,10 +692,9 @@ update_in_band_remotes(struct connmgr *mgr)
             continue;
         }
 
-        if (stream_parse_target_with_default_ports(target,
-                                                   OFP_TCP_PORT,
-                                                   OFP_SSL_PORT,
-                                                   sin)) {
+        if (stream_parse_target_with_default_port(target,
+                                                  OFP_OLD_PORT,
+                                                  sin)) {
             n_addrs++;
         }
     }
index 180b87e..9ec081a 100644 (file)
@@ -29,6 +29,8 @@
 #include "list.h"
 #include "netlink.h"
 #include "ofpbuf.h"
+#include "ofproto-dpif-ipfix.h"
+#include "ofproto-dpif-sflow.h"
 #include "ofproto-dpif.h"
 #include "packets.h"
 #include "poll-loop.h"
@@ -38,9 +40,8 @@
 
 VLOG_DEFINE_THIS_MODULE(ofproto_dpif_upcall);
 
-COVERAGE_DEFINE(upcall_queue_overflow);
 COVERAGE_DEFINE(drop_queue_overflow);
-COVERAGE_DEFINE(miss_queue_overflow);
+COVERAGE_DEFINE(upcall_queue_overflow);
 COVERAGE_DEFINE(fmb_queue_overflow);
 COVERAGE_DEFINE(fmb_queue_revalidated);
 
@@ -53,11 +54,12 @@ struct handler {
 
     struct ovs_mutex mutex;            /* Mutex guarding the following. */
 
-    /* Atomic queue of unprocessed miss upcalls. */
+    /* Atomic queue of unprocessed upcalls. */
     struct list upcalls OVS_GUARDED;
     size_t n_upcalls OVS_GUARDED;
 
     size_t n_new_upcalls;              /* Only changed by the dispatcher. */
+    bool need_signal;                  /* Only changed by the dispatcher. */
 
     pthread_cond_t wake_cond;          /* Wakes 'thread' while holding
                                           'mutex'. */
@@ -78,12 +80,11 @@ struct udpif {
 
     pthread_t dispatcher;              /* Dispatcher thread ID. */
 
-    struct handler *handlers;          /* Miss handlers. */
+    struct handler *handlers;          /* Upcall handlers. */
     size_t n_handlers;
 
     /* Queues to pass up to ofproto-dpif. */
     struct guarded_list drop_keys; /* "struct drop key"s. */
-    struct guarded_list upcalls;   /* "struct upcall"s. */
     struct guarded_list fmbs;      /* "struct flow_miss_batch"es. */
 
     /* Number of times udpif_revalidate() has been called. */
@@ -94,13 +95,33 @@ struct udpif {
     struct latch exit_latch; /* Tells child threads to exit. */
 };
 
+enum upcall_type {
+    BAD_UPCALL,                 /* Some kind of bug somewhere. */
+    MISS_UPCALL,                /* A flow miss.  */
+    SFLOW_UPCALL,               /* sFlow sample. */
+    FLOW_SAMPLE_UPCALL,         /* Per-flow sampling. */
+    IPFIX_UPCALL                /* Per-bridge sampling. */
+};
+
+struct upcall {
+    struct list list_node;          /* For queuing upcalls. */
+    struct flow_miss *flow_miss;    /* This upcall's flow_miss. */
+
+    /* Raw upcall plus data for keeping track of the memory backing it. */
+    struct dpif_upcall dpif_upcall; /* As returned by dpif_recv() */
+    struct ofpbuf upcall_buf;       /* Owns some data in 'dpif_upcall'. */
+    uint64_t upcall_stub[512 / 8];  /* Buffer to reduce need for malloc(). */
+};
+
+static void upcall_destroy(struct upcall *);
+
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 
 static void recv_upcalls(struct udpif *);
-static void handle_miss_upcalls(struct udpif *, struct list *upcalls);
+static void handle_upcalls(struct udpif *, struct list *upcalls);
 static void miss_destroy(struct flow_miss *);
 static void *udpif_dispatcher(void *);
-static void *udpif_miss_handler(void *);
+static void *udpif_upcall_handler(void *);
 
 struct udpif *
 udpif_create(struct dpif_backer *backer, struct dpif *dpif)
@@ -113,7 +134,6 @@ udpif_create(struct dpif_backer *backer, struct dpif *dpif)
     udpif->wait_seq = seq_create();
     latch_init(&udpif->exit_latch);
     guarded_list_init(&udpif->drop_keys);
-    guarded_list_init(&udpif->upcalls);
     guarded_list_init(&udpif->fmbs);
     atomic_init(&udpif->reval_seq, 0);
 
@@ -125,7 +145,6 @@ udpif_destroy(struct udpif *udpif)
 {
     struct flow_miss_batch *fmb;
     struct drop_key *drop_key;
-    struct upcall *upcall;
 
     udpif_recv_set(udpif, 0, false);
 
@@ -133,16 +152,11 @@ udpif_destroy(struct udpif *udpif)
         drop_key_destroy(drop_key);
     }
 
-    while ((upcall = upcall_next(udpif))) {
-        upcall_destroy(upcall);
-    }
-
     while ((fmb = flow_miss_batch_next(udpif))) {
         flow_miss_batch_destroy(fmb);
     }
 
     guarded_list_destroy(&udpif->drop_keys);
-    guarded_list_destroy(&udpif->upcalls);
     guarded_list_destroy(&udpif->fmbs);
     latch_destroy(&udpif->exit_latch);
     seq_destroy(udpif->wait_seq);
@@ -150,8 +164,9 @@ udpif_destroy(struct udpif *udpif)
 }
 
 /* Tells 'udpif' to begin or stop handling flow misses depending on the value
- * of 'enable'.  'n_handlers' is the number of miss_handler threads to create.
- * Passing 'n_handlers' as zero is equivalent to passing 'enable' as false. */
+ * of 'enable'.  'n_handlers' is the number of upcall_handler threads to
+ * create.  Passing 'n_handlers' as zero is equivalent to passing 'enable' as
+ * false. */
 void
 udpif_recv_set(struct udpif *udpif, size_t n_handlers, bool enable)
 {
@@ -208,9 +223,11 @@ udpif_recv_set(struct udpif *udpif, size_t n_handlers, bool enable)
 
             handler->udpif = udpif;
             list_init(&handler->upcalls);
+            handler->need_signal = false;
             xpthread_cond_init(&handler->wake_cond, NULL);
             ovs_mutex_init(&handler->mutex);
-            xpthread_create(&handler->thread, NULL, udpif_miss_handler, handler);
+            xpthread_create(&handler->thread, NULL, udpif_upcall_handler,
+                            handler);
         }
         xpthread_create(&udpif->dispatcher, NULL, udpif_dispatcher, udpif);
     }
@@ -221,7 +238,6 @@ udpif_wait(struct udpif *udpif)
 {
     uint64_t seq = seq_read(udpif->wait_seq);
     if (!guarded_list_is_empty(&udpif->drop_keys) ||
-        !guarded_list_is_empty(&udpif->upcalls) ||
         !guarded_list_is_empty(&udpif->fmbs)) {
         poll_immediate_wake();
     } else {
@@ -254,18 +270,8 @@ udpif_revalidate(struct udpif *udpif)
     udpif_drop_key_clear(udpif);
 }
 
-/* Retrieves the next upcall which ofproto-dpif is responsible for handling.
- * The caller is responsible for destroying the returned upcall with
- * upcall_destroy(). */
-struct upcall *
-upcall_next(struct udpif *udpif)
-{
-    struct list *next = guarded_list_pop_front(&udpif->upcalls);
-    return next ? CONTAINER_OF(next, struct upcall, list_node) : NULL;
-}
-
 /* Destroys and deallocates 'upcall'. */
-void
+static void
 upcall_destroy(struct upcall *upcall)
 {
     if (upcall) {
@@ -362,9 +368,8 @@ udpif_drop_key_clear(struct udpif *udpif)
     }
 }
 \f
-/* The dispatcher thread is responsible for receving upcalls from the kernel,
- * assigning the miss upcalls to a miss_handler thread, and assigning the more
- * complex ones to ofproto-dpif directly. */
+/* The dispatcher thread is responsible for receiving upcalls from the kernel,
+ * assigning them to a upcall_handler thread. */
 static void *
 udpif_dispatcher(void *arg)
 {
@@ -385,11 +390,11 @@ udpif_dispatcher(void *arg)
  * by the dispatcher thread.  Once finished it passes the processed miss
  * upcalls to ofproto-dpif where they're installed in the datapath. */
 static void *
-udpif_miss_handler(void *arg)
+udpif_upcall_handler(void *arg)
 {
     struct handler *handler = arg;
 
-    set_subprogram_name("miss_handler");
+    set_subprogram_name("upcall_handler");
     for (;;) {
         struct list misses = LIST_INITIALIZER(&misses);
         size_t i;
@@ -415,7 +420,7 @@ udpif_miss_handler(void *arg)
         }
         ovs_mutex_unlock(&handler->mutex);
 
-        handle_miss_upcalls(handler->udpif, &misses);
+        handle_upcalls(handler->udpif, &misses);
     }
 }
 \f
@@ -483,13 +488,14 @@ classify_upcall(const struct upcall *upcall)
 static void
 recv_upcalls(struct udpif *udpif)
 {
-    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60);
-    size_t n_udpif_new_upcalls = 0;
-    struct handler *handler;
     int n;
 
     for (;;) {
+        uint32_t hash = udpif->secret;
+        struct handler *handler;
         struct upcall *upcall;
+        size_t n_bytes, left;
+        struct nlattr *nla;
         int error;
 
         upcall = xmalloc(sizeof *upcall);
@@ -502,85 +508,65 @@ recv_upcalls(struct udpif *udpif)
             break;
         }
 
-        upcall->type = classify_upcall(upcall);
-        if (upcall->type == BAD_UPCALL) {
-            upcall_destroy(upcall);
-        } else if (upcall->type == MISS_UPCALL) {
-            struct dpif_upcall *dupcall = &upcall->dpif_upcall;
-            uint32_t hash = udpif->secret;
-            struct nlattr *nla;
-            size_t n_bytes, left;
-
-            n_bytes = 0;
-            NL_ATTR_FOR_EACH (nla, left, dupcall->key, dupcall->key_len) {
-                enum ovs_key_attr type = nl_attr_type(nla);
-                if (type == OVS_KEY_ATTR_IN_PORT
-                    || type == OVS_KEY_ATTR_TCP
-                    || type == OVS_KEY_ATTR_UDP) {
-                    if (nl_attr_get_size(nla) == 4) {
-                        ovs_be32 attr = nl_attr_get_be32(nla);
-                        hash = mhash_add(hash, (OVS_FORCE uint32_t) attr);
-                        n_bytes += 4;
-                    } else {
-                        VLOG_WARN("Netlink attribute with incorrect size.");
-                    }
+        n_bytes = 0;
+        NL_ATTR_FOR_EACH (nla, left, upcall->dpif_upcall.key,
+                          upcall->dpif_upcall.key_len) {
+            enum ovs_key_attr type = nl_attr_type(nla);
+            if (type == OVS_KEY_ATTR_IN_PORT
+                || type == OVS_KEY_ATTR_TCP
+                || type == OVS_KEY_ATTR_UDP) {
+                if (nl_attr_get_size(nla) == 4) {
+                    hash = mhash_add(hash, nl_attr_get_u32(nla));
+                    n_bytes += 4;
+                } else {
+                    VLOG_WARN_RL(&rl,
+                                 "Netlink attribute with incorrect size.");
                 }
             }
-            hash =  mhash_finish(hash, n_bytes);
+        }
+        hash =  mhash_finish(hash, n_bytes);
 
-            handler = &udpif->handlers[hash % udpif->n_handlers];
+        handler = &udpif->handlers[hash % udpif->n_handlers];
 
-            ovs_mutex_lock(&handler->mutex);
-            if (handler->n_upcalls < MAX_QUEUE_LENGTH) {
-                list_push_back(&handler->upcalls, &upcall->list_node);
-                handler->n_new_upcalls = ++handler->n_upcalls;
-
-                if (handler->n_new_upcalls >= FLOW_MISS_MAX_BATCH) {
-                    xpthread_cond_signal(&handler->wake_cond);
-                }
-                ovs_mutex_unlock(&handler->mutex);
-                if (!VLOG_DROP_DBG(&rl)) {
-                    struct ds ds = DS_EMPTY_INITIALIZER;
-
-                    odp_flow_key_format(upcall->dpif_upcall.key,
-                                        upcall->dpif_upcall.key_len,
-                                        &ds);
-                    VLOG_DBG("dispatcher: miss enqueue (%s)", ds_cstr(&ds));
-                    ds_destroy(&ds);
-                }
-            } else {
-                ovs_mutex_unlock(&handler->mutex);
-                COVERAGE_INC(miss_queue_overflow);
-                upcall_destroy(upcall);
+        ovs_mutex_lock(&handler->mutex);
+        if (handler->n_upcalls < MAX_QUEUE_LENGTH) {
+            list_push_back(&handler->upcalls, &upcall->list_node);
+            if (handler->n_upcalls == 0) {
+                handler->need_signal = true;
             }
-        } else {
-            size_t len;
-
-            len = guarded_list_push_back(&udpif->upcalls, &upcall->list_node,
-                                         MAX_QUEUE_LENGTH);
-            if (len > 0) {
-                n_udpif_new_upcalls = len;
-                if (n_udpif_new_upcalls >= FLOW_MISS_MAX_BATCH) {
-                    seq_change(udpif->wait_seq);
-                }
-            } else {
-                COVERAGE_INC(upcall_queue_overflow);
-                upcall_destroy(upcall);
+            handler->n_upcalls++;
+            if (handler->need_signal &&
+                handler->n_upcalls >= FLOW_MISS_MAX_BATCH) {
+                handler->need_signal = false;
+                xpthread_cond_signal(&handler->wake_cond);
             }
+            ovs_mutex_unlock(&handler->mutex);
+            if (!VLOG_DROP_DBG(&rl)) {
+                struct ds ds = DS_EMPTY_INITIALIZER;
+
+                odp_flow_key_format(upcall->dpif_upcall.key,
+                                    upcall->dpif_upcall.key_len,
+                                    &ds);
+                VLOG_DBG("dispatcher: enqueue (%s)", ds_cstr(&ds));
+                ds_destroy(&ds);
+            }
+        } else {
+            ovs_mutex_unlock(&handler->mutex);
+            COVERAGE_INC(upcall_queue_overflow);
+            upcall_destroy(upcall);
         }
     }
+
     for (n = 0; n < udpif->n_handlers; ++n) {
-        handler = &udpif->handlers[n];
-        if (handler->n_new_upcalls) {
-            handler->n_new_upcalls = 0;
+        struct handler *handler = &udpif->handlers[n];
+
+        if (handler->need_signal) {
+            handler->need_signal = false;
             ovs_mutex_lock(&handler->mutex);
             xpthread_cond_signal(&handler->wake_cond);
             ovs_mutex_unlock(&handler->mutex);
         }
     }
-    if (n_udpif_new_upcalls) {
-        seq_change(udpif->wait_seq);
-    }
 }
 
 static struct flow_miss *
@@ -599,7 +585,7 @@ flow_miss_find(struct hmap *todo, const struct ofproto_dpif *ofproto,
 }
 
 static void
-handle_miss_upcalls(struct udpif *udpif, struct list *upcalls)
+handle_upcalls(struct udpif *udpif, struct list *upcalls)
 {
     struct dpif_op *opsp[FLOW_MISS_MAX_BATCH];
     struct dpif_op ops[FLOW_MISS_MAX_BATCH];
@@ -608,6 +594,7 @@ handle_miss_upcalls(struct udpif *udpif, struct list *upcalls)
     size_t n_misses, n_ops, i;
     struct flow_miss *miss;
     unsigned int reval_seq;
+    enum upcall_type type;
     bool fail_open;
 
     /* Extract the flow from each upcall.  Construct in fmb->misses a hash
@@ -640,6 +627,8 @@ handle_miss_upcalls(struct udpif *udpif, struct list *upcalls)
         struct flow_miss *miss = &fmb->miss_buf[n_misses];
         struct flow_miss *existing_miss;
         struct ofproto_dpif *ofproto;
+        struct dpif_sflow *sflow;
+        struct dpif_ipfix *ipfix;
         odp_port_t odp_in_port;
         struct flow flow;
         int error;
@@ -647,8 +636,39 @@ handle_miss_upcalls(struct udpif *udpif, struct list *upcalls)
         error = xlate_receive(udpif->backer, packet, dupcall->key,
                               dupcall->key_len, &flow, &miss->key_fitness,
                               &ofproto, &odp_in_port);
+        if (error) {
+            if (error == ENODEV) {
+                struct drop_key *drop_key;
+
+                /* Received packet on datapath port for which we couldn't
+                 * associate an ofproto.  This can happen if a port is removed
+                 * while traffic is being received.  Print a rate-limited
+                 * message in case it happens frequently.  Install a drop flow
+                 * so that future packets of the flow are inexpensively dropped
+                 * in the kernel. */
+                VLOG_INFO_RL(&rl, "received packet on unassociated datapath "
+                             "port %"PRIu32, odp_in_port);
+
+                drop_key = xmalloc(sizeof *drop_key);
+                drop_key->key = xmemdup(dupcall->key, dupcall->key_len);
+                drop_key->key_len = dupcall->key_len;
+
+                if (guarded_list_push_back(&udpif->drop_keys,
+                                           &drop_key->list_node,
+                                           MAX_QUEUE_LENGTH)) {
+                    seq_change(udpif->wait_seq);
+                } else {
+                    COVERAGE_INC(drop_queue_overflow);
+                    drop_key_destroy(drop_key);
+                }
+            }
+            list_remove(&upcall->list_node);
+            upcall_destroy(upcall);
+            continue;
+        }
 
-        if (!error) {
+        type = classify_upcall(upcall);
+        if (type == MISS_UPCALL) {
             uint32_t hash;
 
             flow_extract(packet, flow.skb_priority, flow.pkt_mark,
@@ -677,35 +697,57 @@ handle_miss_upcalls(struct udpif *udpif, struct list *upcalls)
             miss->stats.n_packets++;
 
             upcall->flow_miss = miss;
-        } else {
-            if (error == ENODEV) {
-                struct drop_key *drop_key;
-
-                /* Received packet on datapath port for which we couldn't
-                 * associate an ofproto.  This can happen if a port is removed
-                 * while traffic is being received.  Print a rate-limited
-                 * message in case it happens frequently.  Install a drop flow
-                 * so that future packets of the flow are inexpensively dropped
-                 * in the kernel. */
-                VLOG_INFO_RL(&rl, "received packet on unassociated datapath "
-                             "port %"PRIu32, odp_in_port);
-
-                drop_key = xmalloc(sizeof *drop_key);
-                drop_key->key = xmemdup(dupcall->key, dupcall->key_len);
-                drop_key->key_len = dupcall->key_len;
+            continue;
+        }
 
-                if (guarded_list_push_back(&udpif->drop_keys,
-                                           &drop_key->list_node,
-                                           MAX_QUEUE_LENGTH)) {
-                    seq_change(udpif->wait_seq);
-                } else {
-                    COVERAGE_INC(drop_queue_overflow);
-                    drop_key_destroy(drop_key);
-                }
+        switch (type) {
+        case SFLOW_UPCALL:
+            sflow = xlate_get_sflow(ofproto);
+            if (sflow) {
+                union user_action_cookie cookie;
+
+                memset(&cookie, 0, sizeof cookie);
+                memcpy(&cookie, nl_attr_get(dupcall->userdata),
+                       sizeof cookie.sflow);
+                dpif_sflow_received(sflow, dupcall->packet, &flow, odp_in_port,
+                                    &cookie);
+                dpif_sflow_unref(sflow);
             }
-            list_remove(&upcall->list_node);
-            upcall_destroy(upcall);
+            break;
+        case IPFIX_UPCALL:
+            ipfix = xlate_get_ipfix(ofproto);
+            if (ipfix) {
+                dpif_ipfix_bridge_sample(ipfix, dupcall->packet, &flow);
+                dpif_ipfix_unref(ipfix);
+            }
+            break;
+        case FLOW_SAMPLE_UPCALL:
+            ipfix = xlate_get_ipfix(ofproto);
+            if (ipfix) {
+                union user_action_cookie cookie;
+
+                memset(&cookie, 0, sizeof cookie);
+                memcpy(&cookie, nl_attr_get(dupcall->userdata),
+                       sizeof cookie.flow_sample);
+
+                /* The flow reflects exactly the contents of the packet.
+                 * Sample the packet using it. */
+                dpif_ipfix_flow_sample(ipfix, dupcall->packet, &flow,
+                                       cookie.flow_sample.collector_set_id,
+                                       cookie.flow_sample.probability,
+                                       cookie.flow_sample.obs_domain_id,
+                                       cookie.flow_sample.obs_point_id);
+                dpif_ipfix_unref(ipfix);
+            }
+            break;
+        case BAD_UPCALL:
+            break;
+        case MISS_UPCALL:
+            NOT_REACHED();
         }
+
+        list_remove(&upcall->list_node);
+        upcall_destroy(upcall);
     }
 
     /* Initialize each 'struct flow_miss's ->xout.
index cd97e79..da75719 100644 (file)
@@ -40,38 +40,6 @@ void udpif_wait(struct udpif *);
 
 void udpif_revalidate(struct udpif *);
 \f
-/* udpif can handle some upcalls on its own.  Others need the main ofproto_dpif
- * code to handle them.  This interface passes upcalls not handled by udpif up
- * to the ofproto_dpif main thread. */
-
-/* Type of an upcall. */
-enum upcall_type {
-    /* Handled internally by udpif code.  Not returned by upcall_next().*/
-    BAD_UPCALL,                 /* Some kind of bug somewhere. */
-    MISS_UPCALL,                /* A flow miss.  */
-
-    /* Require main thread's involvement.  May be returned by upcall_next(). */
-    SFLOW_UPCALL,               /* sFlow sample. */
-    FLOW_SAMPLE_UPCALL,         /* Per-flow sampling. */
-    IPFIX_UPCALL                /* Per-bridge sampling. */
-};
-
-/* An upcall. */
-struct upcall {
-    struct list list_node;          /* For queuing upcalls. */
-    struct flow_miss *flow_miss;    /* This upcall's flow_miss. */
-
-    enum upcall_type type;          /* Classification. */
-
-    /* Raw upcall plus data for keeping track of the memory backing it. */
-    struct dpif_upcall dpif_upcall; /* As returned by dpif_recv() */
-    struct ofpbuf upcall_buf;       /* Owns some data in 'dpif_upcall'. */
-    uint64_t upcall_stub[512 / 8];  /* Buffer to reduce need for malloc(). */
-};
-
-struct upcall *upcall_next(struct udpif *);
-void upcall_destroy(struct upcall *);
-\f
 /* udpif figures out how to forward packets, and does forward them, but it
  * can't set up datapath flows on its own.  This interface passes packet
  * forwarding data from udpif to the higher level ofproto_dpif to allow the
index a5b6814..930abc3 100644 (file)
@@ -56,6 +56,10 @@ VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate);
  * flow translation. */
 #define MAX_RESUBMIT_RECURSION 64
 
+/* Maximum number of resubmit actions in a flow translation, whether they are
+ * recursive or not. */
+#define MAX_RESUBMITS (MAX_RESUBMIT_RECURSION * MAX_RESUBMIT_RECURSION)
+
 struct ovs_rwlock xlate_rwlock = OVS_RWLOCK_INITIALIZER;
 
 struct xbridge {
@@ -158,7 +162,17 @@ struct xlate_ctx {
     /* The rule that we are currently translating, or NULL. */
     struct rule_dpif *rule;
 
-    int recurse;                /* Recursion level, via xlate_table_action. */
+    int mpls_depth_delta;       /* Delta of the mpls stack depth since
+                                 * actions were last committed.
+                                 * Must be between -1 and 1 inclusive. */
+    ovs_be32 pre_push_mpls_lse; /* Used to record the top-most MPLS LSE
+                                 * prior to an mpls_push so that it may be
+                                 * used for a subsequent mpls_pop. */
+
+    /* Resubmit statistics, via xlate_table_action(). */
+    int recurse;                /* Current resubmit nesting depth. */
+    int resubmits;              /* Total number of resubmits. */
+
     uint32_t orig_skb_priority; /* Priority when packet arrived. */
     uint8_t table_id;           /* OpenFlow table ID where flow was found. */
     uint32_t sflow_n_outputs;   /* Number of output ports. */
@@ -1534,7 +1548,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
 
     /* If 'struct flow' gets additional metadata, we'll need to zero it out
      * before traversing a patch port. */
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 20);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 21);
 
     if (!xport) {
         xlate_report(ctx, "Nonexistent output port");
@@ -1645,7 +1659,8 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
 
     if (out_port != ODPP_NONE) {
         commit_odp_actions(flow, &ctx->base_flow,
-                           &ctx->xout->odp_actions, &ctx->xout->wc);
+                           &ctx->xout->odp_actions, &ctx->xout->wc,
+                           &ctx->mpls_depth_delta);
         nl_msg_put_odp_port(&ctx->xout->odp_actions, OVS_ACTION_ATTR_OUTPUT,
                             out_port);
 
@@ -1677,6 +1692,7 @@ xlate_recursively(struct xlate_ctx *ctx, struct rule_dpif *rule)
         rule_dpif_credit_stats(rule, ctx->xin->resubmit_stats);
     }
 
+    ctx->resubmits++;
     ctx->recurse++;
     ctx->rule = rule;
     actions = rule_dpif_get_actions(rule);
@@ -1690,7 +1706,18 @@ static void
 xlate_table_action(struct xlate_ctx *ctx,
                    ofp_port_t in_port, uint8_t table_id, bool may_packet_in)
 {
-    if (ctx->recurse < MAX_RESUBMIT_RECURSION) {
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+
+    if (ctx->recurse >= MAX_RESUBMIT_RECURSION) {
+        VLOG_ERR_RL(&rl, "resubmit actions recursed over %d times",
+                    MAX_RESUBMIT_RECURSION);
+    } else if (ctx->resubmits >= MAX_RESUBMITS) {
+        VLOG_ERR_RL(&rl, "over %d resubmit actions", MAX_RESUBMITS);
+    } else if (ctx->xout->odp_actions.size > UINT16_MAX) {
+        VLOG_ERR_RL(&rl, "resubmits yielded over 64 kB of actions");
+    } else if (ctx->stack.size >= 65536) {
+        VLOG_ERR_RL(&rl, "resubmits yielded over 64 kB of stack");
+    } else {
         struct rule_dpif *rule;
         ofp_port_t old_in_port = ctx->xin->flow.in_port.ofp_port;
         uint8_t old_table_id = ctx->table_id;
@@ -1730,12 +1757,10 @@ xlate_table_action(struct xlate_ctx *ctx,
         }
 
         ctx->table_id = old_table_id;
-    } else {
-        static struct vlog_rate_limit recurse_rl = VLOG_RATE_LIMIT_INIT(1, 1);
-
-        VLOG_ERR_RL(&recurse_rl, "resubmit actions recursed over %d times",
-                    MAX_RESUBMIT_RECURSION);
+        return;
     }
+
+    ctx->exit = true;
 }
 
 static void
@@ -1800,7 +1825,8 @@ execute_controller_action(struct xlate_ctx *ctx, int len,
     memset(&key.tunnel, 0, sizeof key.tunnel);
 
     commit_odp_actions(&ctx->xin->flow, &ctx->base_flow,
-                       &ctx->xout->odp_actions, &ctx->xout->wc);
+                       &ctx->xout->odp_actions, &ctx->xout->wc,
+                       &ctx->mpls_depth_delta);
 
     odp_execute_actions(NULL, packet, &key, ctx->xout->odp_actions.data,
                         ctx->xout->odp_actions.size, NULL, NULL);
@@ -1820,7 +1846,7 @@ execute_controller_action(struct xlate_ctx *ctx, int len,
     ofpbuf_delete(packet);
 }
 
-static void
+static bool
 compose_mpls_push_action(struct xlate_ctx *ctx, ovs_be16 eth_type)
 {
     struct flow_wildcards *wc = &ctx->xout->wc;
@@ -1828,12 +1854,35 @@ compose_mpls_push_action(struct xlate_ctx *ctx, ovs_be16 eth_type)
 
     ovs_assert(eth_type_mpls(eth_type));
 
+    /* If mpls_depth_delta is negative then an MPLS POP action has been
+     * composed and the resulting MPLS label stack is unknown.  This means
+     * an MPLS PUSH action can't be composed as it needs to know either the
+     * top-most MPLS LSE to use as a template for the new MPLS LSE, or that
+     * there is no MPLS label stack present.  Thus, stop processing.
+     *
+     * If mpls_depth_delta is positive then an MPLS PUSH action has been
+     * composed and no further MPLS PUSH action may be performed without
+     * losing MPLS LSE and ether type information held in xtx->xin->flow.
+     * Thus, stop processing.
+     *
+     * If the MPLS LSE of the flow and base_flow differ then the MPLS LSE
+     * has been updated.  Performing a MPLS PUSH action may be would result in
+     * losing MPLS LSE and ether type information held in xtx->xin->flow.
+     * Thus, stop processing.
+     *
+     * It is planned that in the future this case will be handled
+     * by recirculation */
+    if (ctx->mpls_depth_delta ||
+        ctx->xin->flow.mpls_lse != ctx->base_flow.mpls_lse) {
+        return true;
+    }
+
     memset(&wc->masks.mpls_lse, 0xff, sizeof wc->masks.mpls_lse);
-    memset(&wc->masks.mpls_depth, 0xff, sizeof wc->masks.mpls_depth);
 
-    if (flow->mpls_depth) {
+    ctx->pre_push_mpls_lse = ctx->xin->flow.mpls_lse;
+
+    if (eth_type_mpls(ctx->xin->flow.dl_type)) {
         flow->mpls_lse &= ~htonl(MPLS_BOS_MASK);
-        flow->mpls_depth++;
     } else {
         ovs_be32 label;
         uint8_t tc, ttl;
@@ -1848,30 +1897,48 @@ compose_mpls_push_action(struct xlate_ctx *ctx, ovs_be16 eth_type)
         tc = (flow->nw_tos & IP_DSCP_MASK) >> 2;
         ttl = flow->nw_ttl ? flow->nw_ttl : 0x40;
         flow->mpls_lse = set_mpls_lse_values(ttl, tc, 1, label);
-        flow->mpls_depth = 1;
     }
     flow->dl_type = eth_type;
+    ctx->mpls_depth_delta++;
+
+    return false;
 }
 
-static void
+static bool
 compose_mpls_pop_action(struct xlate_ctx *ctx, ovs_be16 eth_type)
 {
     struct flow_wildcards *wc = &ctx->xout->wc;
-    struct flow *flow = &ctx->xin->flow;
 
-    ovs_assert(eth_type_mpls(ctx->xin->flow.dl_type));
-    ovs_assert(!eth_type_mpls(eth_type));
+    if (!eth_type_mpls(ctx->xin->flow.dl_type)) {
+        return true;
+    }
+
+    /* If mpls_depth_delta is negative then an MPLS POP action has been
+     * composed.  Performing another MPLS POP action
+     * would result in losing ether type that results from
+     * the already composed MPLS POP. Thus, stop processing.
+     *
+     * It is planned that in the future this case will be handled
+     * by recirculation */
+    if (ctx->mpls_depth_delta < 0) {
+        return true;
+    }
 
     memset(&wc->masks.mpls_lse, 0xff, sizeof wc->masks.mpls_lse);
-    memset(&wc->masks.mpls_depth, 0xff, sizeof wc->masks.mpls_depth);
 
-    if (flow->mpls_depth) {
-        flow->mpls_depth--;
-        flow->mpls_lse = htonl(0);
-        if (!flow->mpls_depth) {
-            flow->dl_type = eth_type;
-        }
+    /* If mpls_depth_delta is positive then an MPLS PUSH action has been
+     * executed and the previous MPLS LSE saved in ctx->pre_push_mpls_lse. The
+     * flow's MPLS LSE should be restored to that value to allow any
+     * subsequent actions that update of the LSE to be executed correctly.
+     */
+    if (ctx->mpls_depth_delta > 0) {
+        ctx->xin->flow.mpls_lse = ctx->pre_push_mpls_lse;
     }
+
+    ctx->xin->flow.dl_type = eth_type;
+    ctx->mpls_depth_delta--;
+
+    return false;
 }
 
 static bool
@@ -1907,6 +1974,18 @@ compose_set_mpls_ttl_action(struct xlate_ctx *ctx, uint8_t ttl)
         return true;
     }
 
+    /* If mpls_depth_delta is negative then an MPLS POP action has been
+     * executed and the resulting MPLS label stack is unknown.  This means
+     * a SET MPLS TTL push action can't be executed as it needs to manipulate
+     * the top-most MPLS LSE. Thus, stop processing.
+     *
+     * It is planned that in the future this case will be handled
+     * by recirculation.
+     */
+    if (ctx->mpls_depth_delta < 0) {
+        return true;
+    }
+
     ctx->xout->wc.masks.mpls_lse |= htonl(MPLS_TTL_MASK);
     set_mpls_lse_ttl(&ctx->xin->flow.mpls_lse, ttl);
     return false;
@@ -2134,7 +2213,8 @@ xlate_sample_action(struct xlate_ctx *ctx,
   uint32_t probability = (os->probability << 16) | os->probability;
 
   commit_odp_actions(&ctx->xin->flow, &ctx->base_flow,
-                     &ctx->xout->odp_actions, &ctx->xout->wc);
+                     &ctx->xout->odp_actions, &ctx->xout->wc,
+                     &ctx->mpls_depth_delta);
 
   compose_flow_sample_cookie(os->probability, os->collector_set_id,
                              os->obs_domain_id, os->obs_point_id, &cookie);
@@ -2309,11 +2389,17 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
             break;
 
         case OFPACT_PUSH_MPLS:
-            compose_mpls_push_action(ctx, ofpact_get_PUSH_MPLS(a)->ethertype);
+            if (compose_mpls_push_action(ctx,
+                                         ofpact_get_PUSH_MPLS(a)->ethertype)) {
+                return;
+            }
             break;
 
         case OFPACT_POP_MPLS:
-            compose_mpls_pop_action(ctx, ofpact_get_POP_MPLS(a)->ethertype);
+            if (compose_mpls_pop_action(ctx,
+                                        ofpact_get_POP_MPLS(a)->ethertype)) {
+                return;
+            }
             break;
 
         case OFPACT_SET_MPLS_TTL:
@@ -2462,6 +2548,44 @@ xlate_out_copy(struct xlate_out *dst, const struct xlate_out *src)
     ofpbuf_put(&dst->odp_actions, src->odp_actions.data,
                src->odp_actions.size);
 }
+
+/* Returns a reference to the sflow handled associated with ofproto, or NULL if
+ * there is none.  The caller is responsible for decrementing the results ref
+ * count with dpif_sflow_unref(). */
+struct dpif_sflow *
+xlate_get_sflow(const struct ofproto_dpif *ofproto)
+{
+    struct dpif_sflow *sflow = NULL;
+    struct xbridge *xbridge;
+
+    ovs_rwlock_rdlock(&xlate_rwlock);
+    xbridge = xbridge_lookup(ofproto);
+    if (xbridge) {
+        sflow = dpif_sflow_ref(xbridge->sflow);
+    }
+    ovs_rwlock_unlock(&xlate_rwlock);
+
+    return sflow;
+}
+
+/* Returns a reference to the ipfix handled associated with ofproto, or NULL if
+ * there is none.  The caller is responsible for decrementing the results ref
+ * count with dpif_ipfix_unref(). */
+struct dpif_ipfix *
+xlate_get_ipfix(const struct ofproto_dpif *ofproto)
+{
+    struct dpif_ipfix *ipfix = NULL;
+    struct xbridge *xbridge;
+
+    ovs_rwlock_rdlock(&xlate_rwlock);
+    xbridge = xbridge_lookup(ofproto);
+    if (xbridge) {
+        ipfix = dpif_ipfix_ref(xbridge->ipfix);
+    }
+    ovs_rwlock_unlock(&xlate_rwlock);
+
+    return ipfix;
+}
 \f
 static struct skb_priority_to_dscp *
 get_skb_priority(const struct xport *xport, uint32_t skb_priority)
@@ -2595,9 +2719,11 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
     }
 
     ctx.recurse = 0;
+    ctx.resubmits = 0;
     ctx.orig_skb_priority = flow->skb_priority;
     ctx.table_id = 0;
     ctx.exit = false;
+    ctx.mpls_depth_delta = 0;
 
     if (xin->ofpacts) {
         ofpacts = xin->ofpacts;
@@ -2682,6 +2808,15 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
         }
     }
 
+    if (nl_attr_oversized(ctx.xout->odp_actions.size)) {
+        /* These datapath actions are too big for a Netlink attribute, so we
+         * can't execute them. */
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+
+        VLOG_ERR_RL(&rl, "discarding oversize datapath actions");
+        ofpbuf_clear(&ctx.xout->odp_actions);
+    }
+
     ofpbuf_uninit(&ctx.stack);
 
     /* Clear the metadata and register wildcard masks, because we won't
index a54a9e4..6403f50 100644 (file)
@@ -154,4 +154,10 @@ void xlate_in_init(struct xlate_in *, struct ofproto_dpif *,
 void xlate_out_uninit(struct xlate_out *);
 void xlate_actions_for_side_effects(struct xlate_in *);
 void xlate_out_copy(struct xlate_out *dst, const struct xlate_out *src);
+
+struct dpif_sflow *xlate_get_sflow(const struct ofproto_dpif *)
+    OVS_EXCLUDED(xlate_rwlock);
+struct dpif_ipfix *xlate_get_ipfix(const struct ofproto_dpif *)
+    OVS_EXCLUDED(xlate_rwlock);
+
 #endif /* ofproto-dpif-xlate.h */
index 80874b8..80e97e0 100644 (file)
 VLOG_DEFINE_THIS_MODULE(ofproto_dpif);
 
 COVERAGE_DEFINE(ofproto_dpif_expired);
-COVERAGE_DEFINE(facet_changed_rule);
 COVERAGE_DEFINE(facet_revalidate);
 COVERAGE_DEFINE(facet_unexpected);
-COVERAGE_DEFINE(facet_suppress);
+COVERAGE_DEFINE(facet_create);
+COVERAGE_DEFINE(facet_remove);
+COVERAGE_DEFINE(subfacet_create);
+COVERAGE_DEFINE(subfacet_destroy);
 COVERAGE_DEFINE(subfacet_install_fail);
 COVERAGE_DEFINE(packet_in_overflow);
-COVERAGE_DEFINE(flow_mod_overflow);
 
 /* Number of implemented OpenFlow tables. */
 enum { N_TABLES = 255 };
@@ -437,20 +438,6 @@ struct dpif_backer {
     unsigned avg_n_subfacet;         /* Average number of flows. */
     long long int avg_subfacet_life; /* Average life span of subfacets. */
 
-    /* The average number of subfacets... */
-    struct avg_subfacet_rates hourly;   /* ...over the last hour. */
-    struct avg_subfacet_rates daily;    /* ...over the last day. */
-    struct avg_subfacet_rates lifetime; /* ...over the switch lifetime. */
-    long long int last_minute;          /* Last time 'hourly' was updated. */
-
-    /* Number of subfacets added or deleted since 'last_minute'. */
-    unsigned subfacet_add_count;
-    unsigned subfacet_del_count;
-
-    /* Number of subfacets added or deleted from 'created' to 'last_minute.' */
-    unsigned long long int total_subfacet_add_count;
-    unsigned long long int total_subfacet_del_count;
-
     /* Number of upcall handling threads. */
     unsigned int n_handler_threads;
 };
@@ -459,7 +446,6 @@ struct dpif_backer {
 static struct shash all_dpif_backers = SHASH_INITIALIZER(&all_dpif_backers);
 
 static void drop_key_clear(struct dpif_backer *);
-static void update_moving_averages(struct dpif_backer *backer);
 
 struct ofproto_dpif {
     struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */
@@ -1217,14 +1203,6 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp)
 
     backer->max_n_subfacet = 0;
     backer->created = time_msec();
-    backer->last_minute = backer->created;
-    memset(&backer->hourly, 0, sizeof backer->hourly);
-    memset(&backer->daily, 0, sizeof backer->daily);
-    memset(&backer->lifetime, 0, sizeof backer->lifetime);
-    backer->subfacet_add_count = 0;
-    backer->subfacet_del_count = 0;
-    backer->total_subfacet_add_count = 0;
-    backer->total_subfacet_del_count = 0;
     backer->avg_n_subfacet = 0;
     backer->avg_subfacet_life = 0;
 
@@ -3427,107 +3405,12 @@ handle_flow_misses(struct dpif_backer *backer, struct flow_miss_batch *fmb)
     }
 }
 
-static void
-handle_sflow_upcall(struct dpif_backer *backer,
-                    const struct dpif_upcall *upcall)
-{
-    struct ofproto_dpif *ofproto;
-    union user_action_cookie cookie;
-    struct flow flow;
-    odp_port_t odp_in_port;
-
-    if (xlate_receive(backer, upcall->packet, upcall->key, upcall->key_len,
-                      &flow, NULL, &ofproto, &odp_in_port)
-        || !ofproto->sflow) {
-        return;
-    }
-
-    memset(&cookie, 0, sizeof cookie);
-    memcpy(&cookie, nl_attr_get(upcall->userdata), sizeof cookie.sflow);
-    dpif_sflow_received(ofproto->sflow, upcall->packet, &flow,
-                        odp_in_port, &cookie);
-}
-
-static void
-handle_flow_sample_upcall(struct dpif_backer *backer,
-                          const struct dpif_upcall *upcall)
-{
-    struct ofproto_dpif *ofproto;
-    union user_action_cookie cookie;
-    struct flow flow;
-
-    if (xlate_receive(backer, upcall->packet, upcall->key, upcall->key_len,
-                      &flow, NULL, &ofproto, NULL)
-        || !ofproto->ipfix) {
-        return;
-    }
-
-    memset(&cookie, 0, sizeof cookie);
-    memcpy(&cookie, nl_attr_get(upcall->userdata), sizeof cookie.flow_sample);
-
-    /* The flow reflects exactly the contents of the packet.  Sample
-     * the packet using it. */
-    dpif_ipfix_flow_sample(ofproto->ipfix, upcall->packet, &flow,
-                           cookie.flow_sample.collector_set_id,
-                           cookie.flow_sample.probability,
-                           cookie.flow_sample.obs_domain_id,
-                           cookie.flow_sample.obs_point_id);
-}
-
-static void
-handle_ipfix_upcall(struct dpif_backer *backer,
-                    const struct dpif_upcall *upcall)
-{
-    struct ofproto_dpif *ofproto;
-    struct flow flow;
-
-    if (xlate_receive(backer, upcall->packet, upcall->key, upcall->key_len,
-                      &flow, NULL, &ofproto, NULL)
-        || !ofproto->ipfix) {
-        return;
-    }
-
-    /* The flow reflects exactly the contents of the packet.  Sample
-     * the packet using it. */
-    dpif_ipfix_bridge_sample(ofproto->ipfix, upcall->packet, &flow);
-}
-
 static void
 handle_upcalls(struct dpif_backer *backer)
 {
     struct flow_miss_batch *fmb;
     int n_processed;
 
-    for (n_processed = 0; n_processed < FLOW_MISS_MAX_BATCH; n_processed++) {
-        struct upcall *upcall = upcall_next(backer->udpif);
-
-        if (!upcall) {
-            break;
-        }
-
-        switch (upcall->type) {
-        case SFLOW_UPCALL:
-            handle_sflow_upcall(backer, &upcall->dpif_upcall);
-            break;
-
-        case FLOW_SAMPLE_UPCALL:
-            handle_flow_sample_upcall(backer, &upcall->dpif_upcall);
-            break;
-
-        case IPFIX_UPCALL:
-            handle_ipfix_upcall(backer, &upcall->dpif_upcall);
-            break;
-
-        case BAD_UPCALL:
-            break;
-
-        case MISS_UPCALL:
-            NOT_REACHED();
-        }
-
-        upcall_destroy(upcall);
-    }
-
     for (n_processed = 0; n_processed < FLOW_MISS_MAX_BATCH; n_processed++) {
         struct drop_key *drop_key = drop_key_next(backer->udpif);
         if (!drop_key) {
@@ -3739,8 +3622,6 @@ update_stats(struct dpif_backer *backer)
         run_fast_rl();
     }
     dpif_flow_dump_done(&dump);
-
-    update_moving_averages(backer);
 }
 
 /* Calculates and returns the number of milliseconds of idle time after which
@@ -3923,6 +3804,7 @@ facet_create(const struct flow_miss *miss)
     struct facet *facet;
     struct match match;
 
+    COVERAGE_INC(facet_create);
     facet = xzalloc(sizeof *facet);
     facet->ofproto = miss->ofproto;
     facet->used = miss->stats.used;
@@ -3986,6 +3868,7 @@ facet_remove(struct facet *facet)
 {
     struct subfacet *subfacet, *next_subfacet;
 
+    COVERAGE_INC(facet_remove);
     ovs_assert(!list_is_empty(&facet->subfacets));
 
     /* First uninstall all of the subfacets to get final statistics. */
@@ -4523,6 +4406,7 @@ subfacet_create(struct facet *facet, struct flow_miss *miss)
         subfacet = xmalloc(sizeof *subfacet);
     }
 
+    COVERAGE_INC(subfacet_create);
     hmap_insert(&backer->subfacets, &subfacet->hmap_node, key_hash);
     list_push_back(&facet->subfacets, &subfacet->list_node);
     subfacet->facet = facet;
@@ -4535,7 +4419,6 @@ subfacet_create(struct facet *facet, struct flow_miss *miss)
     subfacet->path = SF_NOT_INSTALLED;
     subfacet->backer = backer;
 
-    backer->subfacet_add_count++;
     return subfacet;
 }
 
@@ -4545,11 +4428,8 @@ static void
 subfacet_destroy__(struct subfacet *subfacet)
 {
     struct facet *facet = subfacet->facet;
-    struct ofproto_dpif *ofproto = facet->ofproto;
-
-    /* Update ofproto stats before uninstall the subfacet. */
-    ofproto->backer->subfacet_del_count++;
 
+    COVERAGE_INC(subfacet_destroy);
     subfacet_uninstall(subfacet);
     hmap_remove(&subfacet->backer->subfacets, &subfacet->hmap_node);
     list_remove(&subfacet->list_node);
@@ -5340,12 +5220,13 @@ static void
 ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
                       void *aux OVS_UNUSED)
 {
-    const struct dpif_backer *backer;
+    const struct dpif_backer *backer = NULL;
     struct ofproto_dpif *ofproto;
     struct ofpbuf odp_key, odp_mask;
     struct ofpbuf *packet;
     struct ds result;
     struct flow flow;
+    struct simap port_names;
     char *s;
 
     packet = NULL;
@@ -5353,6 +5234,7 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
     ds_init(&result);
     ofpbuf_init(&odp_key, 0);
     ofpbuf_init(&odp_mask, 0);
+    simap_init(&port_names);
 
     /* Handle "-generate" or a hex string as the last argument. */
     if (!strcmp(argv[argc - 1], "-generate")) {
@@ -5369,37 +5251,42 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
         }
     }
 
+    /* odp_flow can have its in_port specified as a name instead of port no.
+     * We do not yet know whether a given flow is a odp_flow or a br_flow.
+     * But, to know whether a flow is odp_flow through odp_flow_from_string(),
+     * we need to create a simap of name to port no. */
+    if (argc == 3) {
+        const char *dp_type;
+        if (!strncmp(argv[1], "ovs-", 4)) {
+            dp_type = argv[1] + 4;
+        } else {
+            dp_type = argv[1];
+        }
+        backer = shash_find_data(&all_dpif_backers, dp_type);
+    } else {
+        struct shash_node *node;
+        if (shash_count(&all_dpif_backers) == 1) {
+            node = shash_first(&all_dpif_backers);
+            backer = node->data;
+        }
+    }
+    if (backer && backer->dpif) {
+        struct dpif_port dpif_port;
+        struct dpif_port_dump port_dump;
+        DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, backer->dpif) {
+            simap_put(&port_names, dpif_port.name,
+                      odp_to_u32(dpif_port.port_no));
+        }
+    }
+
     /* Parse the flow and determine whether a datapath or
      * bridge is specified. If function odp_flow_key_from_string()
      * returns 0, the flow is a odp_flow. If function
      * parse_ofp_exact_flow() returns 0, the flow is a br_flow. */
-    if (!odp_flow_from_string(argv[argc - 1], NULL, &odp_key, &odp_mask)) {
-        /* If the odp_flow is the second argument,
-         * the datapath name is the first argument. */
-        if (argc == 3) {
-            const char *dp_type;
-            if (!strncmp(argv[1], "ovs-", 4)) {
-                dp_type = argv[1] + 4;
-            } else {
-                dp_type = argv[1];
-            }
-            backer = shash_find_data(&all_dpif_backers, dp_type);
-            if (!backer) {
-                unixctl_command_reply_error(conn, "Cannot find datapath "
-                               "of this name");
-                goto exit;
-            }
-        } else {
-            /* No datapath name specified, so there should be only one
-             * datapath. */
-            struct shash_node *node;
-            if (shash_count(&all_dpif_backers) != 1) {
-                unixctl_command_reply_error(conn, "Must specify datapath "
-                         "name, there is more than one type of datapath");
-                goto exit;
-            }
-            node = shash_first(&all_dpif_backers);
-            backer = node->data;
+    if (!odp_flow_from_string(argv[argc - 1], &port_names, &odp_key, &odp_mask)) {
+        if (!backer) {
+            unixctl_command_reply_error(conn, "Cannot find the datapath");
+            goto exit;
         }
 
         if (xlate_receive(backer, NULL, odp_key.data, odp_key.size, &flow,
@@ -5452,6 +5339,7 @@ exit:
     ofpbuf_delete(packet);
     ofpbuf_uninit(&odp_key);
     ofpbuf_uninit(&odp_mask);
+    simap_destroy(&port_names);
 }
 
 static void
@@ -5636,14 +5524,6 @@ ofproto_unixctl_dpif_dump_dps(struct unixctl_conn *conn, int argc OVS_UNUSED,
     ds_destroy(&ds);
 }
 
-static void
-show_dp_rates(struct ds *ds, const char *heading,
-              const struct avg_subfacet_rates *rates)
-{
-    ds_put_format(ds, "%s add rate: %5.3f/min, del rate: %5.3f/min\n",
-                  heading, rates->add_rate, rates->del_rate);
-}
-
 static void
 dpif_show_backer(const struct dpif_backer *backer, struct ds *ds)
 {
@@ -5651,7 +5531,6 @@ dpif_show_backer(const struct dpif_backer *backer, struct ds *ds)
     struct ofproto_dpif *ofproto;
     struct shash ofproto_shash;
     uint64_t n_hit, n_missed;
-    long long int minutes;
     size_t i;
 
     n_hit = n_missed = 0;
@@ -5669,15 +5548,6 @@ dpif_show_backer(const struct dpif_backer *backer, struct ds *ds)
                   backer->avg_n_subfacet, backer->max_n_subfacet,
                   backer->avg_subfacet_life);
 
-    minutes = (time_msec() - backer->created) / (1000 * 60);
-    if (minutes >= 60) {
-        show_dp_rates(ds, "\thourly avg:", &backer->hourly);
-    }
-    if (minutes >= 60 * 24) {
-        show_dp_rates(ds, "\tdaily avg:",  &backer->daily);
-    }
-    show_dp_rates(ds, "\toverall avg:",  &backer->lifetime);
-
     shash_init(&ofproto_shash);
     ofprotos = get_ofprotos(&ofproto_shash);
     for (i = 0; i < shash_count(&ofproto_shash); i++) {
@@ -5884,7 +5754,7 @@ ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn,
         }
 
         odp_flow_format(subfacet->key, subfacet->key_len,
-                        mask.data, mask.size, &ds, false);
+                        mask.data, mask.size, NULL, &ds, false);
 
         ds_put_format(&ds, ", packets:%"PRIu64", bytes:%"PRIu64", used:",
                       subfacet->dp_packet_count, subfacet->dp_byte_count);
@@ -6230,51 +6100,6 @@ odp_port_to_ofp_port(const struct ofproto_dpif *ofproto, odp_port_t odp_port)
     }
 }
 
-/* Compute exponentially weighted moving average, adding 'new' as the newest,
- * most heavily weighted element.  'base' designates the rate of decay: after
- * 'base' further updates, 'new''s weight in the EWMA decays to about 1/e
- * (about .37). */
-static void
-exp_mavg(double *avg, int base, double new)
-{
-    *avg = (*avg * (base - 1) + new) / base;
-}
-
-static void
-update_moving_averages(struct dpif_backer *backer)
-{
-    const int min_ms = 60 * 1000; /* milliseconds in one minute. */
-    long long int minutes = (time_msec() - backer->created) / min_ms;
-
-    if (minutes > 0) {
-        backer->lifetime.add_rate = (double) backer->total_subfacet_add_count
-            / minutes;
-        backer->lifetime.del_rate = (double) backer->total_subfacet_del_count
-            / minutes;
-    } else {
-        backer->lifetime.add_rate = 0.0;
-        backer->lifetime.del_rate = 0.0;
-    }
-
-    /* Update hourly averages on the minute boundaries. */
-    if (time_msec() - backer->last_minute >= min_ms) {
-        exp_mavg(&backer->hourly.add_rate, 60, backer->subfacet_add_count);
-        exp_mavg(&backer->hourly.del_rate, 60, backer->subfacet_del_count);
-
-        /* Update daily averages on the hour boundaries. */
-        if ((backer->last_minute - backer->created) / min_ms % 60 == 59) {
-            exp_mavg(&backer->daily.add_rate, 24, backer->hourly.add_rate);
-            exp_mavg(&backer->daily.del_rate, 24, backer->hourly.del_rate);
-        }
-
-        backer->total_subfacet_add_count += backer->subfacet_add_count;
-        backer->total_subfacet_del_count += backer->subfacet_del_count;
-        backer->subfacet_add_count = 0;
-        backer->subfacet_del_count = 0;
-        backer->last_minute += min_ms;
-    }
-}
-
 const struct ofproto_class ofproto_dpif_class = {
     init,
     enumerate_types,
index 03b19c8..de566e3 100644 (file)
@@ -81,10 +81,10 @@ struct ofproto {
     /* Datapath. */
     struct hmap ports;          /* Contains "struct ofport"s. */
     struct shash port_by_name;
-    unsigned long *ofp_port_ids;/* Bitmap of used OpenFlow port numbers. */
     struct simap ofp_requests;  /* OpenFlow port number requests. */
     uint16_t alloc_port_no;     /* Last allocated OpenFlow port number. */
     uint16_t max_ports;         /* Max possible OpenFlow port num, plus one. */
+    struct hmap ofport_usage;   /* Map ofport to last used time. */
 
     /* Flow tables. */
     long long int eviction_group_timer; /* For rate limited reheapification. */
index ae39283..8e4f300 100644 (file)
 
 VLOG_DEFINE_THIS_MODULE(ofproto);
 
-COVERAGE_DEFINE(ofproto_error);
 COVERAGE_DEFINE(ofproto_flush);
-COVERAGE_DEFINE(ofproto_no_packet_in);
 COVERAGE_DEFINE(ofproto_packet_out);
 COVERAGE_DEFINE(ofproto_queue_req);
 COVERAGE_DEFINE(ofproto_recv_openflow);
 COVERAGE_DEFINE(ofproto_reinit_ports);
-COVERAGE_DEFINE(ofproto_uninstallable);
 COVERAGE_DEFINE(ofproto_update_port);
 
 enum ofproto_state {
@@ -240,6 +237,22 @@ static void update_port(struct ofproto *, const char *devname);
 static int init_ports(struct ofproto *);
 static void reinit_ports(struct ofproto *);
 
+static long long int ofport_get_usage(const struct ofproto *,
+                                      ofp_port_t ofp_port);
+static void ofport_set_usage(struct ofproto *, ofp_port_t ofp_port,
+                             long long int last_used);
+
+/* Ofport usage.
+ *
+ * Keeps track of the currently used and recently used ofport values and is
+ * used to prevent immediate recycling of ofport values. */
+struct ofport_usage {
+    struct hmap_node hmap_node; /* In struct ofproto's "ofport_usage" hmap. */
+    ofp_port_t ofp_port;        /* OpenFlow port number. */
+    long long int last_used;    /* Last time the 'ofp_port' was used. LLONG_MAX
+                                   represents in-use ofports. */
+};
+
 /* rule. */
 static void ofproto_rule_destroy__(struct rule *);
 static void ofproto_rule_send_removed(struct rule *, uint8_t reason);
@@ -485,6 +498,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type,
     ofproto->dp_desc = NULL;
     ofproto->frag_handling = OFPC_FRAG_NORMAL;
     hmap_init(&ofproto->ports);
+    hmap_init(&ofproto->ofport_usage);
     shash_init(&ofproto->port_by_name);
     simap_init(&ofproto->ofp_requests);
     ofproto->max_ports = ofp_to_u16(OFPP_MAX);
@@ -518,11 +532,6 @@ ofproto_create(const char *datapath_name, const char *datapath_type,
         return error;
     }
 
-    /* The "max_ports" member should have been set by ->construct(ofproto).
-     * Port 0 is not a valid OpenFlow port, so mark that as unavailable. */
-    ofproto->ofp_port_ids = bitmap_allocate(ofproto->max_ports);
-    bitmap_set1(ofproto->ofp_port_ids, 0);
-
     /* Check that hidden tables, if any, are at the end. */
     ovs_assert(ofproto->n_tables);
     for (i = 0; i + 1 < ofproto->n_tables; i++) {
@@ -1227,8 +1236,8 @@ ofproto_destroy__(struct ofproto *ofproto)
     free(ofproto->serial_desc);
     free(ofproto->dp_desc);
     hmap_destroy(&ofproto->ports);
+    hmap_destroy(&ofproto->ofport_usage);
     shash_destroy(&ofproto->port_by_name);
-    bitmap_free(ofproto->ofp_port_ids);
     simap_destroy(&ofproto->ofp_requests);
 
     OFPROTO_FOR_EACH_TABLE (table, ofproto) {
@@ -1248,6 +1257,7 @@ ofproto_destroy(struct ofproto *p)
     OVS_EXCLUDED(ofproto_mutex)
 {
     struct ofport *ofport, *next_ofport;
+    struct ofport_usage *usage, *next_usage;
 
     if (!p) {
         return;
@@ -1265,6 +1275,11 @@ ofproto_destroy(struct ofproto *p)
         ofport_destroy(ofport);
     }
 
+    HMAP_FOR_EACH_SAFE (usage, next_usage, hmap_node, &p->ofport_usage) {
+        hmap_remove(&p->ofport_usage, &usage->hmap_node);
+        free(usage);
+    }
+
     p->ofproto_class->destruct(p);
     ofproto_destroy__(p);
 }
@@ -1941,35 +1956,45 @@ alloc_ofp_port(struct ofproto *ofproto, const char *netdev_name)
     port_idx = port_idx ? port_idx : UINT16_MAX;
 
     if (port_idx >= ofproto->max_ports
-        || bitmap_is_set(ofproto->ofp_port_ids, port_idx)) {
-        uint16_t end_port_no = ofproto->alloc_port_no;
+        || ofport_get_usage(ofproto, u16_to_ofp(port_idx)) == LLONG_MAX) {
+        uint16_t lru_ofport = 0, end_port_no = ofproto->alloc_port_no;
+        long long int last_used_at, lru = LLONG_MAX;
 
         /* Search for a free OpenFlow port number.  We try not to
          * immediately reuse them to prevent problems due to old
          * flows. */
         for (;;) {
             if (++ofproto->alloc_port_no >= ofproto->max_ports) {
-                ofproto->alloc_port_no = 0;
+                ofproto->alloc_port_no = 1;
             }
-            if (!bitmap_is_set(ofproto->ofp_port_ids,
-                               ofproto->alloc_port_no)) {
+            last_used_at = ofport_get_usage(ofproto,
+                                         u16_to_ofp(ofproto->alloc_port_no));
+            if (!last_used_at) {
                 port_idx = ofproto->alloc_port_no;
                 break;
+            } else if (last_used_at < lru) {
+                lru = last_used_at;
+                lru_ofport = ofproto->alloc_port_no;
             }
+
             if (ofproto->alloc_port_no == end_port_no) {
+                if (lru_ofport) {
+                    port_idx = lru_ofport;
+                    break;
+                }
                 return OFPP_NONE;
             }
         }
     }
-    bitmap_set1(ofproto->ofp_port_ids, port_idx);
+    ofport_set_usage(ofproto, u16_to_ofp(port_idx), LLONG_MAX);
     return u16_to_ofp(port_idx);
 }
 
 static void
-dealloc_ofp_port(const struct ofproto *ofproto, ofp_port_t ofp_port)
+dealloc_ofp_port(struct ofproto *ofproto, ofp_port_t ofp_port)
 {
     if (ofp_to_u16(ofp_port) < ofproto->max_ports) {
-        bitmap_set0(ofproto->ofp_port_ids, ofp_to_u16(ofp_port));
+        ofport_set_usage(ofproto, ofp_port, time_msec());
     }
 }
 
@@ -2194,6 +2219,41 @@ ofproto_get_port(const struct ofproto *ofproto, ofp_port_t ofp_port)
     return NULL;
 }
 
+static long long int
+ofport_get_usage(const struct ofproto *ofproto, ofp_port_t ofp_port)
+{
+    struct ofport_usage *usage;
+
+    HMAP_FOR_EACH_IN_BUCKET (usage, hmap_node, hash_ofp_port(ofp_port),
+                             &ofproto->ofport_usage) {
+        if (usage->ofp_port == ofp_port) {
+            return usage->last_used;
+        }
+    }
+    return 0;
+}
+
+static void
+ofport_set_usage(struct ofproto *ofproto, ofp_port_t ofp_port,
+                 long long int last_used)
+{
+    struct ofport_usage *usage;
+    HMAP_FOR_EACH_IN_BUCKET (usage, hmap_node, hash_ofp_port(ofp_port),
+                             &ofproto->ofport_usage) {
+        if (usage->ofp_port == ofp_port) {
+            usage->last_used = last_used;
+            return;
+        }
+    }
+    ovs_assert(last_used == LLONG_MAX);
+
+    usage = xmalloc(sizeof *usage);
+    usage->ofp_port = ofp_port;
+    usage->last_used = last_used;
+    hmap_insert(&ofproto->ofport_usage, &usage->hmap_node,
+                hash_ofp_port(ofp_port));
+}
+
 int
 ofproto_port_get_stats(const struct ofport *port, struct netdev_stats *stats)
 {
index 5bdb974..5656b9d 100644 (file)
@@ -316,7 +316,9 @@ over HTTP, for these reasons:
 
     * The JSON-RPC specification for HTTP transport is incomplete.
 
-We are using TCP port 6632 for the database JSON-RPC connection.
+We are currently using TCP port 6632 for the database JSON-RPC
+connection, but future versions will switch to using IANA-assigned TCP
+port 6640.
 
 The database wire protocol consists of the following JSON-RPC methods:
 
index ea3c3f3..1316025 100644 (file)
@@ -32,9 +32,6 @@
 #include "socket-util.h"
 #include "transaction.h"
 #include "util.h"
-#include "vlog.h"
-
-VLOG_DEFINE_THIS_MODULE(ovsdb_log);
 
 enum ovsdb_log_mode {
     OVSDB_LOG_READ,
index 4628b59..f86e8f3 100644 (file)
@@ -73,6 +73,10 @@ client before sending an inactivity probe message.
 It is an error for \fIcolumn\fR to have another type.
 .RE
 .
+.IP
+To connect or listen on multiple connection methods, use multiple
+\fB\-\-remote\fR options.
+.
 .IP "\fB\-\-run=\fIcommand\fR]"
 Ordinarily \fBovsdb\-server\fR runs forever, or until it is told to
 exit (see \fBRUNTIME MANAGEMENT COMMANDS\fR below).  With this option,
index 077e7f5..11e61e6 100644 (file)
@@ -40,8 +40,6 @@
 #include "util.h"
 #include "vlog.h"
 
-VLOG_DEFINE_THIS_MODULE(ovsdb_tool);
-
 /* -m, --more: Verbosity level for "show-log" command output. */
 static int show_log_verbosity;
 
index 7e64132..2878d39 100755 (executable)
@@ -5,7 +5,7 @@
 # chkconfig: 2345 09 91
 # description: Manage Open vSwitch kernel modules and user-space daemons
 
-# Copyright (C) 2009, 2010, 2011 Nicira, Inc.
+# Copyright (C) 2009, 2010, 2011, 2013 Nicira, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -81,6 +81,7 @@ case $1 in
         ;;
     status)
         ovs_ctl status
+        exit $?
         ;;
     version)
         ovs_ctl version
index f67c3ab..b706373 100644 (file)
@@ -268,6 +268,7 @@ cookie=0x8 table=6 in_port=85 actions=mod_tp_src:85,controller,resubmit(86,7)
 cookie=0x9 table=7 in_port=86 actions=mod_tp_dst:86,controller,controller
 cookie=0xa dl_src=40:44:44:44:44:41 actions=mod_vlan_vid:99,mod_vlan_pcp:1,controller
 cookie=0xa dl_src=40:44:44:44:44:42 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],controller
+cookie=0xa dl_src=41:44:44:44:44:42 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],pop_mpls:0x0800,controller
 cookie=0xa dl_src=40:44:44:44:44:43 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],controller
 cookie=0xa dl_src=40:44:44:44:44:44 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],controller
 cookie=0xa dl_src=40:44:44:44:44:45 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],dec_mpls_ttl,controller
@@ -383,6 +384,26 @@ mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:42,dl_dst=50:54:
 dnl Modified MPLS controller action.
 AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
 
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=41:44:44:44:44:42,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)'
+done
+OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6])
+ovs-appctl -t ovs-ofctl exit
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=60 in_port=1 (via action) data_len=60 (unbuffered)
+tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=41:44:44:44:44:42,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64 tcp_csum:0
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=60 in_port=1 (via action) data_len=60 (unbuffered)
+tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=41:44:44:44:44:42,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64 tcp_csum:0
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=60 in_port=1 (via action) data_len=60 (unbuffered)
+tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=41:44:44:44:44:42,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64 tcp_csum:0
+])
+
+dnl Modified MPLS controller action.
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
 dnl in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x8847),mpls(label=100,tc=3,ttl=64,bos=1)
 
 for i in 1 2 3; do
@@ -703,6 +724,7 @@ AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
  cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:44:46 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],set_mpls_ttl(10),CONTROLLER:65535
  cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:44:47 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],dec_mpls_ttl,set_mpls_ttl(10),CONTROLLER:65535
  cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:44:48 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],set_mpls_ttl(10),dec_mpls_ttl,CONTROLLER:65535
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=41:44:44:44:44:42 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],pop_mpls:0x0800,CONTROLLER:65535
  cookie=0xb, n_packets=3, n_bytes=180, mpls,dl_src=50:55:55:55:55:55 actions=load:0x3e8->OXM_OF_MPLS_LABEL[[]],CONTROLLER:65535
  cookie=0xc, n_packets=3, n_bytes=180, dl_src=70:77:77:77:77:77 actions=push_mpls:0x8848,load:0x3e8->OXM_OF_MPLS_LABEL[[]],load:0x7->OXM_OF_MPLS_TC[[]],CONTROLLER:65535
  cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:66:66 actions=pop_mpls:0x0800,CONTROLLER:65535
@@ -1139,8 +1161,15 @@ in_port=2 actions=output:1
 ])
 AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
 
-odp_flow="in_port(1)"
+odp_flow="in_port(p1)"
 br_flow="in_port=1"
+# Test command: ofproto/trace odp_flow with in_port as a name.
+AT_CHECK([ovs-appctl ofproto/trace "$odp_flow"], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0], [dnl
+Datapath actions: 2
+])
+
+odp_flow="in_port(1)"
 # Test command: ofproto/trace odp_flow
 AT_CHECK([ovs-appctl ofproto/trace "$odp_flow"], [0], [stdout])
 AT_CHECK([tail -1 stdout], [0], [dnl
@@ -1285,7 +1314,7 @@ m4_foreach(
 [AT_CHECK([ovs-appctl ofproto/trace wrong_name "$odp_flow" option],
   [2], [], [stderr])
 AT_CHECK([tail -2 stderr], [0], [dnl
-Cannot find datapath of this name
+Cannot find the datapath
 ovs-appctl: ovs-vswitchd: server returned an error
 ])])
 
@@ -1298,7 +1327,7 @@ m4_foreach(
 [AT_CHECK([ovs-appctl ofproto/trace "" "$odp_flow" option],
   [2], [], [stderr])
 AT_CHECK([tail -2 stderr], [0], [dnl
-Cannot find datapath of this name
+Cannot find the datapath
 ovs-appctl: ovs-vswitchd: server returned an error
 ])])
 
@@ -1311,7 +1340,7 @@ m4_foreach(
 [AT_CHECK([ovs-appctl ofproto/trace ovs-system "$odp_flow" option],
   [2], [], [stderr])
 AT_CHECK([tail -2 stderr], [0], [dnl
-Cannot find datapath of this name
+Cannot find the datapath
 ovs-appctl: ovs-vswitchd: server returned an error
 ])])
 
@@ -1324,7 +1353,7 @@ m4_foreach(
 [AT_CHECK([ovs-appctl ofproto/trace br0 "$odp_flow" option],
   [2], [], [stderr])
 AT_CHECK([tail -2 stderr], [0], [dnl
-Cannot find datapath of this name
+Cannot find the datapath
 ovs-appctl: ovs-vswitchd: server returned an error
 ])])
 
@@ -2108,7 +2137,6 @@ ADD_OF_PORTS([br1], [3])
 AT_CHECK([ovs-appctl dpif/show], [0], [dnl
 dummy@ovs-dummy: hit:0 missed:0
        flows: cur: 0, avg: 0, max: 0, life span: 0ms
-       overall avg: add rate: 0.000/min, del rate: 0.000/min
        br0: hit:0 missed:0
                br0 65534/100: (dummy)
                p1 1/1: (dummy)
@@ -2201,7 +2229,6 @@ warped
 AT_CHECK([ovs-appctl dpif/show], [0], [dnl
 dummy@ovs-dummy: hit:13 missed:2
        flows: cur: 2, avg: 1, max: 2, life span: 1250ms
-       overall avg: add rate: 0.000/min, del rate: 0.000/min
        br0: hit:9 missed:1
                br0 65534/100: (dummy)
                p2 2/2: (dummy)
@@ -2253,8 +2280,6 @@ AT_CHECK([ovs-appctl time/warp 10000], [0], [warped
 AT_CHECK([ovs-appctl dpif/show | sed 's/ 10[[0-9]]\{3\}(ms)$/ 10000(ms)/'], [0], [dnl
 dummy@ovs-dummy: hit:0 missed:61
        flows: cur: 0, avg: 0, max: 1, life span: 1666ms
-       hourly avg: add rate: 0.641/min, del rate: 0.641/min
-       overall avg: add rate: 1.000/min, del rate: 1.000/min
        br0: hit:0 missed:61
                br0 65534/100: (dummy)
                p1 1/1: (dummy)
@@ -2816,3 +2841,101 @@ AT_CHECK([ovs-appctl bond/show | sed -n '/^.*may_enable:.*/p'], [0], [dnl
 
 OVS_VSWITCHD_STOP
 AT_CLEANUP
+
+# Unit test for appctl coverage/show command
+AT_SETUP([ofproto-dpif - coverage/show])
+OVS_VSWITCHD_START
+
+ovs-appctl time/stop
+# before the first 5 seconds, nothing can be calculated but the total count.
+# there should be two unixctl commands received, but the count of the "appctl
+# coverage/show" command is not updated to the total. so there show only 1.
+AT_CHECK([ovs-appctl coverage/show | sed -n '/^unixctl_received.*/p'], [], [dnl
+unixctl_received           0.0/sec     0.000/sec        0.0000/sec   total: 1
+])
+
+ovs-appctl time/warp 5000
+# at first 5 second time instant, should have stats.
+AT_CHECK([ovs-appctl coverage/show | sed -n '/^unixctl_received.*/p'], [], [dnl
+unixctl_received           0.6/sec     0.050/sec        0.0008/sec   total: 3
+])
+
+for i in `seq 0 10`; do ovs-appctl time/warp 5000; done
+# advance to first 1 minute time instant.
+AT_CHECK([ovs-appctl coverage/show | sed -n '/^unixctl_received.*/p'], [], [dnl
+unixctl_received           0.2/sec     0.250/sec        0.0042/sec   total: 15
+])
+
+ovs-appctl time/warp 60000
+# advance to next 1 minute time instant directly, should observe the per-minute
+# rate drop.
+AT_CHECK([ovs-appctl coverage/show | sed -n '/^unixctl_received.*/p'], [], [dnl
+unixctl_received           0.4/sec     0.033/sec        0.0047/sec   total: 17
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+\f
+AT_BANNER([ofproto-dpif - flow translation resource limits])
+
+AT_SETUP([ofproto-dpif - infinite resubmit])
+OVS_VSWITCHD_START
+AT_CHECK([ovs-ofctl add-flow br0 actions=resubmit:1,resubmit:2,output:3])
+AT_CHECK([ovs-appctl ofproto/trace br0 'eth_dst=ff:ff:ff:ff:ff:ff'],
+  [0], [stdout])
+AT_CHECK([tail -1 stdout], [0], [Datapath actions: drop
+])
+AT_CHECK([grep -c 'resubmit actions recursed over 64 times' ovs-vswitchd.log],
+  [0], [1
+])
+OVS_VSWITCHD_STOP(["/resubmit actions recursed/d"])
+AT_CLEANUP
+
+AT_SETUP([ofproto-dpif - exponential resubmit chain])
+OVS_VSWITCHD_START
+ADD_OF_PORTS([br0], 1)
+(for i in `seq 1 64`; do
+     j=`expr $i + 1`
+     echo "in_port=$i, actions=resubmit:$j, resubmit:$j, local"
+ done
+ echo "in_port=65, actions=local") > flows
+ AT_CHECK([ovs-ofctl add-flows br0 flows])
+AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1'], [0], [stdout])
+AT_CHECK([grep -c 'over 4096 resubmit actions' ovs-vswitchd.log], [0], [1
+])
+OVS_VSWITCHD_STOP(["/over.*resubmit actions/d"])
+AT_CLEANUP
+
+AT_SETUP([ofproto-dpif - too many output actions])
+OVS_VSWITCHD_START
+ADD_OF_PORTS([br0], 1)
+(for i in `seq 1 12`; do
+     j=`expr $i + 1`
+     echo "in_port=$i, actions=resubmit:$j, resubmit:$j, local"
+ done
+ echo "in_port=13, actions=local,local,local,local,local,local,local,local") > flows
+AT_CHECK([ovs-ofctl add-flows br0 flows])
+AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1'], [0], [stdout])
+AT_CHECK([grep -c 'resubmits yielded over 64 kB of actions' ovs-vswitchd.log], [0], [1
+])
+AT_CHECK([grep -c 'discarding oversize datapath actions' ovs-vswitchd.log], [0], [1
+])
+OVS_VSWITCHD_STOP(["/resubmits yielded over 64 kB of actions/d
+/discarding oversize datapath actions/d"])
+AT_CLEANUP
+
+AT_SETUP([ofproto-dpif - stack too deep])
+OVS_VSWITCHD_START
+ADD_OF_PORTS([br0], 1)
+(for i in `seq 1 12`; do
+     j=`expr $i + 1`
+     echo "in_port=$i, actions=resubmit:$j, resubmit:$j, local"
+ done
+ push="push:NXM_NX_REG0[[]]"
+ echo "in_port=13, actions=$push,$push,$push,$push,$push,$push,$push,$push") > flows
+ AT_CHECK([ovs-ofctl add-flows br0 flows])
+AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1'], [0], [stdout])
+AT_CHECK([grep -c 'resubmits yielded over 64 kB of stack' ovs-vswitchd.log], [0], [1
+])
+OVS_VSWITCHD_STOP(["/resubmits yielded over 64 kB of stack/d"])
+AT_CLEANUP
index 839d41f..3bcffc2 100644 (file)
@@ -89,6 +89,10 @@ m4_define([OVS_VSWITCHD_START],
 m4_divert_push([PREPARE_TESTS])
 check_logs () {
     sed -n "$1
+/timeval.*Unreasonably long [[0-9]]*ms poll interval/d
+/timeval.*faults: [[0-9]]* minor, [[0-9]]* major/d
+/timeval.*disk: [[0-9]]* reads, [[0-9]]* writes/d
+/timeval.*context switches: [[0-9]]* voluntary, [[0-9]]* involuntary/d
 /|WARN|/p
 /|ERR|/p
 /|EMER|/p" ovs-vswitchd.log ovsdb-server.log
index 41e2e38..1bb2b0b 100644 (file)
@@ -141,8 +141,6 @@ main(int argc, char *argv[])
     flows = xmalloc(N_FLOWS * sizeof *flows);
     for (i = 0; i < N_FLOWS; i++) {
         random_bytes(&flows[i], sizeof flows[i]);
-        memset(flows[i].zeros, 0, sizeof flows[i].zeros);
-        flows[i].mpls_depth = 0;
         flows[i].regs[0] = ofp_to_u16(OFPP_NONE);
     }
 
index f1b12e2..4ba3692 100644 (file)
@@ -66,8 +66,6 @@ main(int argc, char *argv[])
             struct flow flow;
 
             random_bytes(&flow, sizeof flow);
-            memset(flow.zeros, 0, sizeof flow.zeros);
-            flow.mpls_depth = 0;
 
             mp.max_link = n - 1;
             multipath_execute(&mp, &flow, &wc);
index 45605e4..183a3b3 100644 (file)
@@ -86,7 +86,7 @@ parse_keys(bool wc_keys)
         ds_init(&out);
         if (wc_keys) {
             odp_flow_format(odp_key.data, odp_key.size,
-                            odp_mask.data, odp_mask.size, &out, false);
+                            odp_mask.data, odp_mask.size, NULL, &out, false);
         } else {
             odp_flow_key_format(odp_key.data, odp_key.size, &out);
         }
index 697c217..982d22a 100644 (file)
@@ -14,7 +14,7 @@ actions=IN_PORT
 
 AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
 
-AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl
+AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl
                br0 65534/100: (dummy)
                p1 1/1: (gre: remote_ip=1.1.1.1)
                p2 2/1: (gre: local_ip=2.2.2.2, remote_ip=1.1.1.1)
@@ -37,7 +37,7 @@ dnl reconfigure, local_ip, remote_ip
 AT_CHECK([ovs-vsctl set Interface p2 type=gre options:local_ip=2.2.2.3 \
           options:df_default=false options:ttl=1 options:csum=true \
           -- set Interface p3 type=gre64])
-AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl
+AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl
                br0 65534/100: (dummy)
                p1 1/1: (gre: remote_ip=1.1.1.1)
                p2 2/1: (gre: csum=true, df_default=false, local_ip=2.2.2.3, remote_ip=1.1.1.1, ttl=1)
@@ -72,7 +72,7 @@ actions=2
 
 AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
 
-AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl
+AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl
                br0 65534/100: (dummy)
                p1 1/1: (gre: remote_ip=1.1.1.1)
                p2 2/2: (dummy)
@@ -116,7 +116,7 @@ actions=output:1
 
 AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
 
-AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl
+AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl
                br0 65534/100: (dummy)
                p1 1/1: (gre: key=5, local_ip=2.2.2.2, remote_ip=1.1.1.1)
                p2 2/2: (dummy)
@@ -148,7 +148,7 @@ actions=output:1
 
 AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
 
-AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl
+AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl
                br0 65534/100: (dummy)
                p1 1/1: (gre: remote_ip=1.1.1.1, tos=inherit, ttl=inherit)
                p2 2/2: (dummy)
@@ -190,7 +190,7 @@ actions=set_tunnel:1,output:1,set_tunnel:2,output:2,set_tunnel:3,output:3,set_tu
 
 AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
 
-AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl
+AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl
                br0 65534/100: (dummy)
                p1 1/1: (gre: key=flow, remote_ip=1.1.1.1)
                p2 2/1: (gre: key=flow, remote_ip=2.2.2.2)
@@ -222,7 +222,7 @@ actions=IN_PORT,output:1,output:2,output:3
 
 AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
 
-AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl
+AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl
                br0 65534/100: (dummy)
                p1 1/1: (gre: key=1, remote_ip=1.1.1.1)
                p2 2/1: (gre: in_key=2, out_key=3, remote_ip=1.1.1.1)
@@ -274,7 +274,7 @@ tun_id=4,actions=output:5
 
 AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
 
-AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl
+AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl
                br0 65534/100: (dummy)
                p1 1/1: (gre: key=flow, remote_ip=1.1.1.1)
                p2 2/1: (gre: key=3, remote_ip=3.3.3.3)
@@ -310,7 +310,7 @@ AT_SETUP([tunnel - VXLAN])
 OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=vxlan \
                     options:remote_ip=1.1.1.1 ofport_request=1])
 
-AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl
+AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl
                br0 65534/100: (dummy)
                p1 1/1: (vxlan: remote_ip=1.1.1.1)
 ])
@@ -322,7 +322,7 @@ AT_SETUP([tunnel - LISP])
 OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=lisp \
                     options:remote_ip=1.1.1.1 ofport_request=1])
 
-AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl
+AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl
                br0 65534/100: (dummy)
                p1 1/1: (lisp: remote_ip=1.1.1.1)
 ])
@@ -334,7 +334,7 @@ AT_SETUP([tunnel - different VXLAN UDP port])
 OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=vxlan \
                     options:remote_ip=1.1.1.1 ofport_request=1 options:dst_port=4341])
 
-AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl
+AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl
                br0 65534/100: (dummy)
                p1 1/1: (vxlan: dst_port=4341, remote_ip=1.1.1.1)
 ])
@@ -343,7 +343,7 @@ dnl change UDP port
 
 AT_CHECK([ovs-vsctl -- set Interface p1 options:dst_port=5000])
 
-AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl
+AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl
                br0 65534/100: (dummy)
                p1 1/2: (vxlan: dst_port=5000, remote_ip=1.1.1.1)
 ])
@@ -352,7 +352,7 @@ dnl change UDP port to default
 
 AT_CHECK([ovs-vsctl -- set Interface p1 options:dst_port=4789])
 
-AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl
+AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl
                br0 65534/100: (dummy)
                p1 1/1: (vxlan: remote_ip=1.1.1.1)
 ])
index 5c47061..b1ac9a5 100644 (file)
          else if (length > 0 && (sport == LDP_PORT || dport == LDP_PORT)) {
                  ldp_print(bp, length);
 -        }
-+        } else if (sport == OFP_TCP_PORT || dport == OFP_TCP_PORT) {
++        } else if (sport == OFP_OLD_PORT || dport == OFP_OLD_PORT) {
 +                   openflow_print(bp, length);
 +        }
  
index b528dba..6f4e0b5 100644 (file)
@@ -33,9 +33,9 @@ Print verbose debugging output.
 Use the capabilities specified in a comma-separated list.
 .
 .IP "\fB\-\-log\-days=\fIdays\fR"
-Include the logs rotated in the previous \fIdays\fR days in the debug bundle.
-The number of log files included has a big impact on the eventual bundle size.
-The default value is 20 days.
+Include the logs with last modification time in the previous \fIdays\fR days
+in the debug bundle. The number of log files included has a big impact on the
+eventual bundle size. The default value is 20 days.
 .
 .IP "\fB\-\-output=\fIfiletype\fR"
 Generate a debug bundle with the specified file type.  Options include
@@ -48,7 +48,8 @@ Use only Open vSwitch relevant capabilities.
 Suppress output.
 .
 .IP "\fB\-\-unlimited\fR"
-Do not exclude files which are too large.
+Do not exclude files which are too large. Also skip checking free disk space.
+By default up to 90 percent of the free disk space can be used.
 .
 .IP "\fB\-\-yestoall\fR"
 Answer yes to all prompts.
index 61c21db..35b8aef 100755 (executable)
@@ -207,7 +207,7 @@ CAP_MULTIPATH            = 'multipath'
 CAP_NETWORK_CONFIG       = 'network-config'
 CAP_NETWORK_INFO         = 'network-info'
 CAP_NETWORK_STATUS       = 'network-status'
-CAP_OPENVSWITCH_LOGS    = 'ovs-system-logs'
+CAP_OPENVSWITCH_LOGS     = 'ovs-system-logs'
 CAP_PROCESS_LIST         = 'process-list'
 CAP_SYSTEM_LOGS          = 'system-logs'
 CAP_SYSTEM_SERVICES      = 'system-services'
@@ -220,8 +220,10 @@ caps = {}
 cap_sizes = {}
 unlimited_data = False
 dbg = False
-# Default value for the number of rotated logs.
+# Default value for the number of days to collect logs.
 log_days = 20
+log_last_mod_time = None
+free_disk_space = None
 
 def cap(key, pii=PII_MAYBE, min_size=-1, max_size=-1, min_time=-1,
         max_time=-1, mime=MIME_TEXT, checked=True, hidden=False):
@@ -284,7 +286,8 @@ def cmd_output(cap, args, label=None, filter=None, binary=False):
         data[label] = {'cap': cap, 'cmd_args': args, 'filter': filter,
                        'binary': binary}
 
-def file_output(cap, path_list, newest_first=False):
+
+def file_output(cap, path_list, newest_first=False, last_mod_time=None):
     """
     If newest_first is True, the list of files in path_list is sorted
     by file modification time in descending order, else its sorted
@@ -297,19 +300,18 @@ def file_output(cap, path_list, newest_first=False):
                 s = os.stat(path)
             except OSError, e:
                 continue
-            path_entries.append((path, s))
+            if last_mod_time is None or s.st_mtime >= last_mod_time:
+                path_entries.append((path, s))
 
         mtime = lambda(path, stat): stat.st_mtime
         path_entries.sort(key=mtime, reverse=newest_first)
         for p in path_entries:
-            if unlimited_data or caps[cap][MAX_SIZE] == -1 or \
-                    cap_sizes[cap] < caps[cap][MAX_SIZE]:
+            if check_space(cap, p[0], p[1].st_size):
                 data[p] = {'cap': cap, 'filename': p[0]}
-                cap_sizes[cap] += p[1].st_size
-            else:
-                output("Omitting %s, size constraint of %s exceeded" % (p[0], cap))
 
-def tree_output(cap, path, pattern=None, negate=False, newest_first=False):
+
+def tree_output(cap, path, pattern=None, negate=False, newest_first=False,
+                last_mod_time=None):
     """
     Walks the directory tree rooted at path. Files in current dir are processed
     before files in sub-dirs.
@@ -319,23 +321,27 @@ def tree_output(cap, path, pattern=None, negate=False, newest_first=False):
             for root, dirs, files in os.walk(path):
                 fns = [fn for fn in [os.path.join(root, f) for f in files]
                        if os.path.isfile(fn) and matches(fn, pattern, negate)]
-                file_output(cap, fns, newest_first=newest_first)
+                file_output(cap, fns, newest_first=newest_first,
+                            last_mod_time=last_mod_time)
+
+
+def prefix_output(cap, prefix, newest_first=False, last_mod_time=None):
+    """
+    Output files with the same prefix.
+    """
+    fns = []
+    for root, dirs, files in os.walk(os.path.dirname(prefix)):
+        fns += [fn for fn in [os.path.join(root, f) for f in files]
+                if fn.startswith(prefix)]
+    file_output(cap, fns, newest_first=newest_first,
+                last_mod_time=last_mod_time)
+
 
 def func_output(cap, label, func):
     if cap in entries:
         t = str(func).split()
         data[label] = {'cap': cap, 'func': func}
 
-def log_output(cap, logs, newest_first=False):
-    global log_days
-    file_output(cap, logs)
-    file_output(cap,
-        ['%s.%d' % (f, n) for n in range(1, log_days+1) for f in logs], \
-        newest_first=newest_first)
-    file_output(cap,
-        ['%s.%d.gz' % (f, n) for n in range(1, log_days+1) for f in logs], \
-        newest_first=newest_first)
-
 def collect_data():
     process_lists = {}
 
@@ -354,12 +360,8 @@ def collect_data():
                 f = open(v['filename'], 'r')
                 s = f.read()
                 f.close()
-                if unlimited_data or caps[cap][MAX_SIZE] == -1 or \
-                        cap_sizes[cap] < caps[cap][MAX_SIZE]:
+                if check_space(cap, v['filename'], len(s)):
                     v['output'] = StringIOmtime(s)
-                    cap_sizes[cap] += len(s)
-                else:
-                    output("Omitting %s, size constraint of %s exceeded" % (v['filename'], cap))
             except:
                 pass
         elif v.has_key('func'):
@@ -367,19 +369,16 @@ def collect_data():
                 s = v['func'](cap)
             except Exception, e:
                 s = str(e)
-            if unlimited_data or caps[cap][MAX_SIZE] == -1 or \
-                    cap_sizes[cap] < caps[cap][MAX_SIZE]:
+            if check_space(cap, k, len(s)):
                 v['output'] = StringIOmtime(s)
-                cap_sizes[cap] += len(s)
-            else:
-                output("Omitting %s, size constraint of %s exceeded" % (k, cap))
 
     run_procs(process_lists.values())
 
 
 def main(argv=None):
     global ANSWER_YES_TO_ALL, SILENT_MODE
-    global entries, data, dbg, unlimited_data, log_days
+    global entries, data, dbg, unlimited_data, free_disk_space
+    global log_days, log_last_mod_time
 
     # Filter flags
     only_ovs_info = False
@@ -466,6 +465,7 @@ def main(argv=None):
         if k == '--log-days':
             log_days = int(v)
 
+
     if len(params) != 1:
         print >>sys.stderr, "Invalid additional arguments", str(params)
         return 2
@@ -478,6 +478,11 @@ def main(argv=None):
         print >>sys.stderr, "Cannot set both '--outfd' and '--outfile'"
         return 2
 
+    if output_file is not None and not unlimited_data:
+        free_disk_space = get_free_disk_space(output_file) * 90 / 100
+
+    log_last_mod_time = int(time.time()) - log_days * 86400
+
     if ANSWER_YES_TO_ALL:
         output("Warning: '--yestoall' argument provided, will not prompt for individual files.")
 
@@ -590,11 +595,14 @@ exclude those logs from the archive.
     system_logs = ([ VAR_LOG_DIR + x for x in
         ['crit.log', 'kern.log', 'daemon.log', 'user.log',
         'syslog', 'messages', 'secure', 'debug', 'dmesg', 'boot']])
+    for log in system_logs:
+        prefix_output(CAP_SYSTEM_LOGS, log, last_mod_time=log_last_mod_time)
+
     ovs_logs = ([ OPENVSWITCH_LOG_DIR + x for x in
         ['ovs-vswitchd.log', 'ovsdb-server.log',
         'ovs-xapi-sync.log', 'ovs-monitor-ipsec.log', 'ovs-ctl.log']])
-    log_output(CAP_SYSTEM_LOGS, system_logs)
-    log_output(CAP_OPENVSWITCH_LOGS, ovs_logs)
+    for log in ovs_logs:
+        prefix_output(CAP_OPENVSWITCH_LOGS, log, last_mod_time=log_last_mod_time)
 
     if not os.path.exists('/var/log/dmesg') and not os.path.exists('/var/log/boot'):
         cmd_output(CAP_SYSTEM_LOGS, [DMESG])
@@ -813,6 +821,7 @@ def dump_rdac_groups(cap):
                 cmd_output(cap, [MPPUTIL, '-g', group])
 
 def load_plugins(just_capabilities=False, filter=None):
+    global log_last_mod_time
     def getText(nodelist):
         rc = ""
         for node in nodelist:
@@ -873,8 +882,9 @@ def load_plugins(just_capabilities=False, filter=None):
                 if el.tagName == "files":
                     newest_first = getBoolAttr(el, 'newest_first')
                     if el.getAttribute("type") == "logs":
-                        log_output(dir, getText(el.childNodes).split(),
-                                    newest_first=newest_first)
+                        for fn in getText(el.childNodes).split():
+                            prefix_output(dir, fn, newest_first=newest_first,
+                                          last_mod_time=log_last_mod_time)
                     else:
                         file_output(dir, getText(el.childNodes).split(),
                                     newest_first=newest_first)
@@ -883,9 +893,15 @@ def load_plugins(just_capabilities=False, filter=None):
                     if pattern == '': pattern = None
                     negate = getBoolAttr(el, 'negate')
                     newest_first = getBoolAttr(el, 'newest_first')
-                    tree_output(dir, getText(el.childNodes),
-                                pattern and re.compile(pattern) or None,
-                                negate=negate, newest_first=newest_first)
+                    if el.getAttribute("type") == "logs":
+                        tree_output(dir, getText(el.childNodes),
+                                    pattern and re.compile(pattern) or None,
+                                    negate=negate, newest_first=newest_first,
+                                    last_mod_time=log_last_mod_time)
+                    else:
+                        tree_output(dir, getText(el.childNodes),
+                                    pattern and re.compile(pattern) or None,
+                                    negate=negate, newest_first=newest_first)
                 elif el.tagName == "command":
                     label = el.getAttribute("label")
                     if label == '': label = None
@@ -1248,6 +1264,31 @@ def pidof(name):
     return pids
 
 
+def check_space(cap, name, size):
+    global free_disk_space
+    if free_disk_space is not None and size > free_disk_space:
+        output("Omitting %s, out of disk space (requested: %u, allowed: %u)" %
+               (name, size, free_disk_space))
+        return False
+    elif unlimited_data or caps[cap][MAX_SIZE] == -1 or \
+             cap_sizes[cap] < caps[cap][MAX_SIZE]:
+        cap_sizes[cap] += size
+        if free_disk_space is not None:
+            free_disk_space -= size
+        return True
+    else:
+        output("Omitting %s, size constraint of %s exceeded" % (name, cap))
+        return False
+
+
+def get_free_disk_space(path):
+    path = os.path.abspath(path)
+    while not os.path.exists(path):
+        path = os.path.dirname(path)
+    s = os.statvfs(path)
+    return s.f_frsize * s.f_bfree
+
+
 class StringIOmtime(StringIO.StringIO):
     def __init__(self, buf=''):
         StringIO.StringIO.__init__(self, buf)
index 162c585..f29de6a 100644 (file)
@@ -148,6 +148,9 @@ To bind locally to port 6633 (the default) and wait for incoming
 connections from OpenFlow switches:
 .IP
 \fB% ovs\-controller ptcp:\fR
+.PP
+In the future, the default port number will change to 6653, which is the
+IANA-defined value.
 .SH "BUGS"
 .PP
 Configuring a Citrix XenServer to connect to a particular controller
index 98b47b8..4fb02dd 100644 (file)
@@ -49,8 +49,6 @@
 #include "util.h"
 #include "vlog.h"
 
-VLOG_DEFINE_THIS_MODULE(dpctl);
-
 /* -s, --statistics: Print port/flow statistics? */
 static bool print_statistics;
 
@@ -742,9 +740,12 @@ dpctl_dump_flows(int argc, char *argv[])
 {
     const struct dpif_flow_stats *stats;
     const struct nlattr *actions;
-    struct dpif_flow_dump dump;
+    struct dpif_flow_dump flow_dump;
     const struct nlattr *key;
     const struct nlattr *mask;
+    struct dpif_port dpif_port;
+    struct dpif_port_dump port_dump;
+    struct hmap portno_names;
     size_t actions_len;
     struct dpif *dpif;
     size_t key_len;
@@ -756,13 +757,19 @@ dpctl_dump_flows(int argc, char *argv[])
     run(parsed_dpif_open(name, false, &dpif), "opening datapath");
     free(name);
 
+    hmap_init(&portno_names);
+    DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, dpif) {
+        odp_portno_names_set(&portno_names, dpif_port.port_no, dpif_port.name);
+    }
+
     ds_init(&ds);
-    dpif_flow_dump_start(&dump, dpif);
-    while (dpif_flow_dump_next(&dump, &key, &key_len,
+    dpif_flow_dump_start(&flow_dump, dpif);
+    while (dpif_flow_dump_next(&flow_dump, &key, &key_len,
                                &mask, &mask_len,
                                &actions, &actions_len, &stats)) {
         ds_clear(&ds);
-        odp_flow_format(key, key_len, mask, mask_len, &ds, verbosity);
+        odp_flow_format(key, key_len, mask, mask_len, &portno_names, &ds,
+                        verbosity);
         ds_put_cstr(&ds, ", ");
 
         dpif_flow_stats_format(stats, &ds);
@@ -770,7 +777,9 @@ dpctl_dump_flows(int argc, char *argv[])
         format_odp_actions(&ds, actions, actions_len);
         printf("%s\n", ds_cstr(&ds));
     }
-    dpif_flow_dump_done(&dump);
+    dpif_flow_dump_done(&flow_dump);
+    odp_portno_names_destroy(&portno_names);
+    hmap_destroy(&portno_names);
     ds_destroy(&ds);
     dpif_close(dpif);
 }
@@ -781,25 +790,37 @@ dpctl_put_flow(int argc, char *argv[], enum dpif_flow_put_flags flags)
     const char *key_s = argv[argc - 2];
     const char *actions_s = argv[argc - 1];
     struct dpif_flow_stats stats;
+    struct dpif_port dpif_port;
+    struct dpif_port_dump port_dump;
     struct ofpbuf actions;
     struct ofpbuf key;
     struct ofpbuf mask;
     struct dpif *dpif;
     struct ds s;
     char *dp_name;
+    struct simap port_names;
+
+    dp_name = argc == 4 ? xstrdup(argv[1]) : get_one_dp();
+    run(parsed_dpif_open(dp_name, false, &dpif), "opening datapath");
+    free(dp_name);
+
+
+    simap_init(&port_names);
+    DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, dpif) {
+        simap_put(&port_names, dpif_port.name, odp_to_u32(dpif_port.port_no));
+    }
 
     ds_init(&s);
     ofpbuf_init(&key, 0);
     ofpbuf_init(&mask, 0);
-    run(odp_flow_from_string(key_s, NULL, &key, &mask), "parsing flow key");
+    run(odp_flow_from_string(key_s, &port_names, &key, &mask),
+        "parsing flow key");
+
+    simap_destroy(&port_names);
 
     ofpbuf_init(&actions, 0);
     run(odp_actions_from_string(actions_s, NULL, &actions), "parsing actions");
 
-    dp_name = argc == 4 ? xstrdup(argv[1]) : get_one_dp();
-    run(parsed_dpif_open(dp_name, false, &dpif), "opening datapath");
-    free(dp_name);
-
     run(dpif_flow_put(dpif, flags,
                       key.data, key.size,
                       mask.size == 0 ? NULL : mask.data, mask.size,
@@ -848,23 +869,32 @@ dpctl_del_flow(int argc, char *argv[])
 {
     const char *key_s = argv[argc - 1];
     struct dpif_flow_stats stats;
+    struct dpif_port dpif_port;
+    struct dpif_port_dump port_dump;
     struct ofpbuf key;
     struct ofpbuf mask; /* To be ignored. */
     struct dpif *dpif;
     char *dp_name;
-
-    ofpbuf_init(&key, 0);
-    ofpbuf_init(&mask, 0);
-    run(odp_flow_from_string(key_s, NULL, &key, &mask), "parsing flow key");
+    struct simap port_names;
 
     dp_name = argc == 3 ? xstrdup(argv[1]) : get_one_dp();
     run(parsed_dpif_open(dp_name, false, &dpif), "opening datapath");
     free(dp_name);
 
+    simap_init(&port_names);
+    DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, dpif) {
+        simap_put(&port_names, dpif_port.name, odp_to_u32(dpif_port.port_no));
+    }
+
+    ofpbuf_init(&key, 0);
+    ofpbuf_init(&mask, 0);
+    run(odp_flow_from_string(key_s, &port_names, &key, &mask), "parsing flow key");
+
     run(dpif_flow_del(dpif,
                       key.data, key.size,
                       print_statistics ? &stats : NULL), "deleting flow");
 
+    simap_destroy(&port_names);
     ofpbuf_uninit(&key);
     ofpbuf_uninit(&mask);
 
@@ -1051,7 +1081,7 @@ dpctl_normalize_actions(int argc, char *argv[])
         "odp_flow_key_from_string");
 
     ds_clear(&s);
-    odp_flow_format(keybuf.data, keybuf.size, NULL, 0, &s, verbosity);
+    odp_flow_format(keybuf.data, keybuf.size, NULL, 0, NULL, &s, verbosity);
     printf("input flow: %s\n", ds_cstr(&s));
 
     run(odp_flow_key_to_flow(keybuf.data, keybuf.size, &flow),
@@ -1112,7 +1142,7 @@ dpctl_normalize_actions(int argc, char *argv[])
             printf("no vlan: ");
         }
 
-        if (af->flow.mpls_depth) {
+        if (eth_type_mpls(af->flow.dl_type)) {
             printf("mpls(label=%"PRIu32",tc=%d,ttl=%d): ",
                    mpls_lse_to_label(af->flow.mpls_lse),
                    mpls_lse_to_tc(af->flow.mpls_lse),
index 526e12c..c43b48c 100644 (file)
@@ -1068,21 +1068,17 @@ from the IP TTL (64 if the packet is not IP).
 If the packet does already contain an MPLS label, pushes a new
 outermost label as a copy of the existing outermost label.
 .IP
-There are some limitations in the implementation.  \fBpush_mpls\fR
-followed by another \fBpush_mpls\fR will result in the first
-\fBpush_mpls\fR being discarded.
+A limitation of the implementation is that processing of actions will stop
+if \fBpush_mpls\fR follows another \fBpush_mpls\fR unless there is a
+\fBpop_mpls\fR in between.
 .
 .IP \fBpop_mpls\fR:\fIethertype\fR
 Strips the outermost MPLS label stack entry.
 Currently the implementation restricts \fIethertype\fR to a non-MPLS Ethertype
 and thus \fBpop_mpls\fR should only be applied to packets with
-an MPLS label stack depth of one.
-.
-.IP
-There are some limitations in the implementation.  \fBpop_mpls\fR
-followed by another \fBpush_mpls\fR without an intermediate
-\fBpush_mpls\fR will result in the first \fBpush_mpls\fR being
-discarded.
+an MPLS label stack depth of one. A further limitation is that processing of
+actions will stop if \fBpop_mpls\fR follows another \fBpop_mpls\fR unless
+there is a \fBpush_mpls\fR in between.
 .
 .IP \fBmod_dl_src\fB:\fImac\fR
 Sets the source Ethernet address to \fImac\fR.
@@ -1514,7 +1510,7 @@ and \fBdel\-flows\fR commands support one additional optional field:
 .TP
 \fBout_port=\fIport\fR
 If set, a matching flow must include an output action to \fIport\fR,
-which must an OpenFlow port number or name (e.g. \fBlocal\fR).
+which must be an OpenFlow port number or name (e.g. \fBlocal\fR).
 .
 .SS "Table Entry Output"
 .
index da2dc42..ec3633c 100644 (file)
@@ -473,10 +473,9 @@ collect_in_band_managers(const struct ovsrec_open_vswitch *ovs_cfg,
         SSET_FOR_EACH (target, &targets) {
             struct sockaddr_in *sin = &managers[n_managers];
 
-            if (stream_parse_target_with_default_ports(target,
-                                                       JSONRPC_TCP_PORT,
-                                                       JSONRPC_SSL_PORT,
-                                                       sin)) {
+            if (stream_parse_target_with_default_port(target,
+                                                      OVSDB_OLD_PORT,
+                                                      sin)) {
                 n_managers++;
             }
         }
index 5fd5b3b..c12fd8f 100644 (file)
         <dl>
           <dt><code>ssl:<var>ip</var></code>[<code>:<var>port</var></code>]</dt>
           <dd>
-            <p>The specified SSL <var>port</var> (default: 6633) on the host at
-            the given <var>ip</var>, which must be expressed as an IP address
-            (not a DNS name).  The <ref table="Open_vSwitch" column="ssl"/>
-            column in the <ref table="Open_vSwitch"/> table must point to a
-            valid SSL configuration when this form is used.</p>
+            <p>The specified SSL <var>port</var> on the host at the
+            given <var>ip</var>, which must be expressed as an IP
+            address (not a DNS name).  The <ref table="Open_vSwitch"
+            column="ssl"/> column in the <ref table="Open_vSwitch"/>
+            table must point to a valid SSL configuration when this form
+            is used.</p>
+            <p>If <var>port</var> is not specified, it currently
+            defaults to 6633.  In the future, the default will change to
+            6653, which is the IANA-defined value.</p>
             <p>SSL support is an optional feature that is not always built as
             part of Open vSwitch.</p>
           </dd>
           <dt><code>tcp:<var>ip</var></code>[<code>:<var>port</var></code>]</dt>
-          <dd>The specified TCP <var>port</var> (default: 6633) on the host at
-          the given <var>ip</var>, which must be expressed as an IP address
-          (not a DNS name).</dd>
+          <dd>
+            <p>The specified TCP <var>port</var> on the host at the
+            given <var>ip</var>, which must be expressed as an IP
+            address (not a DNS name).</p>
+            <p>If <var>port</var> is not specified, it currently
+            defaults to 6633.  In the future, the default will change to
+            6653, which is the IANA-defined value.</p>
+          </dd>
         </dl>
         <p>
           The following connection methods are currently supported for service
         <dl>
           <dt><code>pssl:</code>[<var>port</var>][<code>:<var>ip</var></code>]</dt>
           <dd>
-            <p>
-              Listens for SSL connections on the specified TCP <var>port</var>
-              (default: 6633).  If <var>ip</var>, which must be expressed as an
-              IP address (not a DNS name), is specified, then connections are
-              restricted to the specified local IP address.
-            </p>
-            <p>
-              The <ref table="Open_vSwitch" column="ssl"/> column in the <ref
-              table="Open_vSwitch"/> table must point to a valid SSL
-              configuration when this form is used.
-            </p>
+            <p> Listens for SSL connections on the specified TCP
+            <var>port</var>.  If <var>ip</var>, which must be expressed
+            as an IP address (not a DNS name), is specified, then
+            connections are restricted to the specified local IP
+            address.  The <ref table="Open_vSwitch" column="ssl"/>
+            column in the <ref table="Open_vSwitch"/> table must point
+            to a valid SSL configuration when this form is used.</p>
+            <p>If <var>port</var> is not specified, it currently
+            defaults to 6633.  In the future, the default will change to
+            6653, which is the IANA-defined value.</p>
             <p>SSL support is an optional feature that is not always built as
             part of Open vSwitch.</p>
           </dd>
           <dt><code>ptcp:</code>[<var>port</var>][<code>:<var>ip</var></code>]</dt>
           <dd>
-            Listens for connections on the specified TCP <var>port</var>
-            (default: 6633).  If <var>ip</var>, which must be expressed as an
-            IP address (not a DNS name), is specified, then connections are
-            restricted to the specified local IP address.
+            <p>Listens for connections on the specified TCP
+            <var>port</var>.  If <var>ip</var>, which must be expressed
+            as an IP address (not a DNS name), is specified, then
+            connections are restricted to the specified local IP
+            address.</p>
+            <p>If <var>port</var> is not specified, it currently
+            defaults to 6633.  In the future, the default will change to
+            6653, which is the IANA-defined value.</p>
           </dd>
         </dl>
         <p>When multiple controllers are configured for a single bridge, the
           <dt><code>ssl:<var>ip</var></code>[<code>:<var>port</var></code>]</dt>
           <dd>
             <p>
-              The specified SSL <var>port</var> (default: 6632) on the host at
-              the given <var>ip</var>, which must be expressed as an IP address
-              (not a DNS name).  The <ref table="Open_vSwitch" column="ssl"/>
-              column in the <ref table="Open_vSwitch"/> table must point to a
-              valid SSL configuration when this form is used.
+              The specified SSL <var>port</var> on the host at the given
+              <var>ip</var>, which must be expressed as an IP address
+              (not a DNS name).  The <ref table="Open_vSwitch"
+              column="ssl"/> column in the <ref table="Open_vSwitch"/>
+              table must point to a valid SSL configuration when this
+              form is used.
             </p>
             <p>
-              SSL support is an optional feature that is not always built as
-              part of Open vSwitch.
+              If <var>port</var> is not specified, it currently defaults
+              to 6632.  In the future, the default will change to 6640,
+              which is the IANA-defined value.
+            </p>
+            <p>
+              SSL support is an optional feature that is not always
+              built as part of Open vSwitch.
             </p>
           </dd>
 
           <dt><code>tcp:<var>ip</var></code>[<code>:<var>port</var></code>]</dt>
           <dd>
-            The specified TCP <var>port</var> (default: 6632) on the host at
-            the given <var>ip</var>, which must be expressed as an IP address
-            (not a DNS name).
+            <p>
+              The specified TCP <var>port</var> on the host at the given
+              <var>ip</var>, which must be expressed as an IP address
+              (not a DNS name).
+            </p>
+            <p>
+              If <var>port</var> is not specified, it currently defaults
+              to 6632.  In the future, the default will change to 6640,
+              which is the IANA-defined value.
+            </p>
           </dd>
           <dt><code>pssl:</code>[<var>port</var>][<code>:<var>ip</var></code>]</dt>
           <dd>
             <p>
-              Listens for SSL connections on the specified TCP <var>port</var>
-              (default: 6632).  Specify 0 for <var>port</var> to have the
-              kernel automatically choose an available port.  If <var>ip</var>,
-              which must be expressed as an IP address (not a DNS name), is
-              specified, then connections are restricted to the specified local
-              IP address.
-            </p>
-            <p>
-              The <ref table="Open_vSwitch" column="ssl"/> column in the <ref
+              Listens for SSL connections on the specified TCP
+              <var>port</var>.  Specify 0 for <var>port</var> to have
+              the kernel automatically choose an available port.  If
+              <var>ip</var>, which must be expressed as an IP address
+              (not a DNS name), is specified, then connections are
+              restricted to the specified local IP address.  The <ref
+              table="Open_vSwitch" column="ssl"/> column in the <ref
               table="Open_vSwitch"/> table must point to a valid SSL
               configuration when this form is used.
             </p>
+            <p>
+              If <var>port</var> is not specified, it currently defaults
+              to 6632.  In the future, the default will change to 6640,
+              which is the IANA-defined value.
+            </p>
             <p>
               SSL support is an optional feature that is not always built as
               part of Open vSwitch.
           </dd>
           <dt><code>ptcp:</code>[<var>port</var>][<code>:<var>ip</var></code>]</dt>
           <dd>
-            Listens for connections on the specified TCP <var>port</var>
-            (default: 6632).  Specify 0 for <var>port</var> to have the kernel
-            automatically choose an available port.  If <var>ip</var>, which
-            must be expressed as an IP address (not a DNS name), is specified,
-            then connections are restricted to the specified local IP address.
+            <p>
+              Listens for connections on the specified TCP
+              <var>port</var>.  Specify 0 for <var>port</var> to have
+              the kernel automatically choose an available port.  If
+              <var>ip</var>, which must be expressed as an IP address
+              (not a DNS name), is specified, then connections are
+              restricted to the specified local IP address.
+            </p>
+            <p>
+              If <var>port</var> is not specified, it currently defaults
+              to 6632.  In the future, the default will change to 6640,
+              which is the IANA-defined value.
+            </p>
           </dd>
         </dl>
         <p>When multiple managers are configured, the <ref column="target"/>