Merge commit 'b5d57fc87925cb3c029de19d0a94de5ca07ae28e'

[sliver-openvswitch.git] / ofproto / ofproto-dpif.c
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c

index 18c5b7d..1da8ffb 100644 (file)
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -20,6 +20,7 @@
  
  #include <errno.h>
  
+#include "bfd.h"
  #include "bond.h"
  #include "bundle.h"
  #include "byte-order.h"
@@ -46,6 +47,7 @@
  #include "ofp-parse.h"
  #include "ofp-print.h"
  #include "ofproto-dpif-governor.h"
+#include "ofproto-dpif-ipfix.h"
  #include "ofproto-dpif-sflow.h"
  #include "poll-loop.h"
  #include "simap.h"
@@ -216,6 +218,17 @@ struct action_xlate_ctx {
       * this flow when actions change header fields. */
      struct flow flow;
  
+    /* Flow at the last commit. */
+    struct flow base_flow;
+
+    /* Tunnel IP destination address as received.  This is stored separately
+     * as the base_flow.tunnel is cleared on init to reflect the datapath
+     * behavior.  Used to make sure not to send tunneled output to ourselves,
+     * which might lead to an infinite loop.  This could happen easily
+     * if a tunnel is marked as 'ip_remote=flow', and the flow does not
+     * actually set the tun_dst field. */
+    ovs_be32 orig_tunnel_ip_dst;
+
      /* stack for the push and pop actions.
       * Each stack element is of the type "union mf_subvalue". */
      struct ofpbuf stack;
@@ -281,7 +294,6 @@ struct action_xlate_ctx {
  
      int recurse;                /* Recursion level, via xlate_table_action. */
      bool max_resubmit_trigger;  /* Recursed too deeply during translation. */
-    struct flow base_flow;      /* Flow at the last commit. */
      uint32_t orig_skb_priority; /* Priority when packet arrived. */
      uint8_t table_id;           /* OpenFlow table ID where flow was found. */
      uint32_t sflow_n_outputs;   /* Number of output ports. */
@@ -303,9 +315,6 @@ struct initial_vals {
      * This member should be removed when the VLAN splinters feature is no
      * longer needed. */
      ovs_be16 vlan_tci;
-
-    /* If received on a tunnel, the IP TOS value of the tunnel. */
-    uint8_t tunnel_ip_tos;
  };
  
  static void action_xlate_ctx_init(struct action_xlate_ctx *,
@@ -325,7 +334,8 @@ static void xlate_table_action(struct action_xlate_ctx *, uint16_t in_port,
  static size_t put_userspace_action(const struct ofproto_dpif *,
                                     struct ofpbuf *odp_actions,
                                     const struct flow *,
-                                   const union user_action_cookie *);
+                                   const union user_action_cookie *,
+                                   const size_t);
  
  static void compose_slow_path(const struct ofproto_dpif *, const struct flow *,
                                enum slow_path_reason,
@@ -370,6 +380,7 @@ struct subfacet {
      int key_len;
  
      long long int used;         /* Time last used; time created if not used. */
+    long long int created;      /* Time created. */
  
      uint64_t dp_packet_count;   /* Last known packet count in the datapath. */
      uint64_t dp_byte_count;     /* Last known byte count in the datapath. */
@@ -491,6 +502,8 @@ struct facet {
       * always be valid, since it could have been removed after newer
       * subfacets were pushed onto the 'subfacets' list.) */
      struct subfacet one_subfacet;
+
+    long long int learn_rl;      /* Rate limiter for facet_learn(). */
  };
  
  static struct facet *facet_create(struct rule_dpif *,
@@ -512,6 +525,7 @@ static void facet_reset_counters(struct facet *);
  static void facet_push_stats(struct facet *);
  static void facet_learn(struct facet *);
  static void facet_account(struct facet *);
+static void push_all_stats(void);
  
  static struct subfacet *facet_get_subfacet(struct facet *);
  
@@ -525,6 +539,7 @@ struct ofport_dpif {
      struct ofbundle *bundle;    /* Bundle that contains this port, if any. */
      struct list bundle_node;    /* In struct ofbundle's "ports" list. */
      struct cfm *cfm;            /* Connectivity Fault Management, if any. */
+    struct bfd *bfd;            /* BFD, if any. */
      tag_type tag;               /* Tag associated with this port. */
      bool may_enable;            /* May be enabled in bonds. */
      long long int carrier_seq;  /* Carrier status changes. */
@@ -585,7 +600,6 @@ static uint16_t odp_port_to_ofp_port(const struct ofproto_dpif *,
  static struct ofport_dpif *
  ofport_dpif_cast(const struct ofport *ofport)
  {
-    ovs_assert(ofport->ofproto->ofproto_class == &ofproto_dpif_class);
      return ofport ? CONTAINER_OF(ofport, struct ofport_dpif, up) : NULL;
  }
  
@@ -594,6 +608,7 @@ static void port_run_fast(struct ofport_dpif *);
  static void port_wait(struct ofport_dpif *);
  static int set_cfm(struct ofport *, const struct cfm_settings *);
  static void ofport_clear_priorities(struct ofport_dpif *);
+static void run_fast_rl(void);
  
  struct dpif_completion {
      struct list list_node;
@@ -667,6 +682,13 @@ odp_port_to_ofport(const struct dpif_backer *, uint32_t odp_port);
  
  static void dpif_stats_update_hit_count(struct ofproto_dpif *ofproto,
                                          uint64_t delta);
+struct avg_subfacet_rates {
+    double add_rate;     /* Moving average of new flows created per minute. */
+    double del_rate;     /* Moving average of flows deleted per minute. */
+};
+static void show_dp_rates(struct ds *ds, const char *heading,
+                          const struct avg_subfacet_rates *rates);
+static void exp_mavg(double *avg, int base, double new);
  
  struct ofproto_dpif {
      struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */
@@ -683,6 +705,7 @@ struct ofproto_dpif {
      /* Bridging. */
      struct netflow *netflow;
      struct dpif_sflow *sflow;
+    struct dpif_ipfix *ipfix;
      struct hmap bundles;        /* Contains "struct ofbundle"s. */
      struct mac_learning *ml;
      struct ofmirror *mirrors[MAX_MIRRORS];
@@ -693,6 +716,7 @@ struct ofproto_dpif {
      struct hmap facets;
      struct hmap subfacets;
      struct governor *governor;
+    long long int consistency_rl;
  
      /* Revalidation. */
      struct table_dpif tables[N_TABLES];
@@ -721,7 +745,48 @@ struct ofproto_dpif {
      /* Per ofproto's dpif stats. */
      uint64_t n_hit;
      uint64_t n_missed;
+
+    /* Subfacet statistics.
+     *
+     * These keep track of the total number of subfacets added and deleted and
+     * flow life span.  They are useful for computing the flow rates stats
+     * exposed via "ovs-appctl dpif/show".  The goal is to learn about
+     * traffic patterns in ways that we can use later to improve Open vSwitch
+     * performance in new situations.  */
+    long long int created;         /* Time when it is created. */
+    unsigned int max_n_subfacet;   /* Maximum number of flows */
+
+    /* The average number of subfacets... */
+    struct avg_subfacet_rates hourly; /* ...over the last hour. */
+    struct avg_subfacet_rates daily;  /* ...over the last day. */
+    long long int last_minute;        /* Last time 'hourly' was updated. */
+
+    /* Number of subfacets added or deleted since 'last_minute'. */
+    unsigned int subfacet_add_count;
+    unsigned int subfacet_del_count;
+
+    /* Number of subfacets added or deleted from 'created' to 'last_minute.' */
+    unsigned long long int total_subfacet_add_count;
+    unsigned long long int total_subfacet_del_count;
+
+    /* Sum of the number of milliseconds that each subfacet existed,
+     * over the subfacets that have been added and then later deleted. */
+    unsigned long long int total_subfacet_life_span;
+
+    /* Incremented by the number of currently existing subfacets, each
+     * time we pull statistics from the kernel. */
+    unsigned long long int total_subfacet_count;
+
+    /* Number of times we pull statistics from the kernel. */
+    unsigned long long int n_update_stats;
  };
+static unsigned long long int avg_subfacet_life_span(
+                                        const struct ofproto_dpif *);
+static double avg_subfacet_count(const struct ofproto_dpif *ofproto);
+static void update_moving_averages(struct ofproto_dpif *ofproto);
+static void dpif_stats_update_hit_count(struct ofproto_dpif *ofproto,
+                                        uint64_t delta);
+static void update_max_subfacet_count(struct ofproto_dpif *ofproto);
  
  /* Defer flow mod completion until "ovs-appctl ofproto/unclog"?  (Useful only
   * for debugging the asynchronous flow_mod implementation.) */
@@ -766,6 +831,9 @@ static int send_packet(const struct ofport_dpif *, struct ofpbuf *packet);
  static size_t compose_sflow_action(const struct ofproto_dpif *,
                                     struct ofpbuf *odp_actions,
                                     const struct flow *, uint32_t odp_port);
+static void compose_ipfix_action(const struct ofproto_dpif *,
+                                 struct ofpbuf *odp_actions,
+                                 const struct flow *);
  static void add_mirror_actions(struct action_xlate_ctx *ctx,
                                 const struct flow *flow);
  /* Global variables. */
@@ -855,6 +923,7 @@ lookup_ofproto_dpif_by_port_name(const char *name)
  static int
  type_run(const char *type)
  {
+    static long long int push_timer = LLONG_MIN;
      struct dpif_backer *backer;
      char *devname;
      int error;
@@ -868,6 +937,16 @@ type_run(const char *type)
  
      dpif_run(backer->dpif);
  
+    /* The most natural place to push facet statistics is when they're pulled
+     * from the datapath.  However, when there are many flows in the datapath,
+     * this expensive operation can occur so frequently, that it reduces our
+     * ability to quickly set up flows.  To reduce the cost, we push statistics
+     * here instead. */
+    if (time_msec() > push_timer) {
+        push_timer = time_msec() + 2000;
+        push_all_stats();
+    }
+
      if (backer->need_revalidate
          || !tag_set_is_empty(&backer->revalidate_set)) {
          struct tag_set revalidate_set = backer->revalidate_set;
@@ -955,6 +1034,7 @@ type_run(const char *type)
                  if (need_revalidate
                      || tag_set_intersects(&revalidate_set, facet->tags)) {
                      facet_revalidate(facet);
+                    run_fast_rl();
                  }
              }
          }
@@ -1022,18 +1102,10 @@ type_run(const char *type)
  }
  
  static int
-type_run_fast(const char *type)
+dpif_backer_run_fast(struct dpif_backer *backer, int max_batch)
  {
-    struct dpif_backer *backer;
      unsigned int work;
  
-    backer = shash_find_data(&all_dpif_backers, type);
-    if (!backer) {
-        /* This is not necessarily a problem, since backers are only
-         * created on demand. */
-        return 0;
-    }
-
      /* Handle one or more batches of upcalls, until there's nothing left to do
       * or until we do a fixed total amount of work.
       *
@@ -1044,8 +1116,8 @@ type_run_fast(const char *type)
       * optimizations can make major improvements on some benchmarks and
       * presumably for real traffic as well. */
      work = 0;
-    while (work < FLOW_MISS_MAX_BATCH) {
-        int retval = handle_upcalls(backer, FLOW_MISS_MAX_BATCH - work);
+    while (work < max_batch) {
+        int retval = handle_upcalls(backer, max_batch - work);
          if (retval <= 0) {
              return -retval;
          }
@@ -1055,6 +1127,58 @@ type_run_fast(const char *type)
      return 0;
  }
  
+static int
+type_run_fast(const char *type)
+{
+    struct dpif_backer *backer;
+
+    backer = shash_find_data(&all_dpif_backers, type);
+    if (!backer) {
+        /* This is not necessarily a problem, since backers are only
+         * created on demand. */
+        return 0;
+    }
+
+    return dpif_backer_run_fast(backer, FLOW_MISS_MAX_BATCH);
+}
+
+static void
+run_fast_rl(void)
+{
+    static long long int port_rl = LLONG_MIN;
+    static unsigned int backer_rl = 0;
+
+    if (time_msec() >= port_rl) {
+        struct ofproto_dpif *ofproto;
+        struct ofport_dpif *ofport;
+
+        HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
+
+            HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
+                port_run_fast(ofport);
+            }
+        }
+        port_rl = time_msec() + 200;
+    }
+
+    /* XXX: We have to be careful not to do too much work in this function.  If
+     * we call dpif_backer_run_fast() too often, or with too large a batch,
+     * performance improves signifcantly, but at a cost.  It's possible for the
+     * number of flows in the datapath to increase without bound, and for poll
+     * loops to take 10s of seconds.   The correct solution to this problem,
+     * long term, is to separate flow miss handling into it's own thread so it
+     * isn't affected by revalidations, and expirations.  Until then, this is
+     * the best we can do. */
+    if (++backer_rl >= 10) {
+        struct shash_node *node;
+
+        backer_rl = 0;
+        SHASH_FOR_EACH (node, &all_dpif_backers) {
+            dpif_backer_run_fast(node->data, 1);
+        }
+    }
+}
+
  static void
  type_wait(const char *type)
  {
@@ -1239,6 +1363,7 @@ construct(struct ofproto *ofproto_)
  
      ofproto->netflow = NULL;
      ofproto->sflow = NULL;
+    ofproto->ipfix = NULL;
      ofproto->stp = NULL;
      hmap_init(&ofproto->bundles);
      ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME);
@@ -1250,6 +1375,7 @@ construct(struct ofproto *ofproto_)
      hmap_init(&ofproto->facets);
      hmap_init(&ofproto->subfacets);
      ofproto->governor = NULL;
+    ofproto->consistency_rl = LLONG_MIN;
  
      for (i = 0; i < N_TABLES; i++) {
          struct table_dpif *table = &ofproto->tables[i];
@@ -1301,6 +1427,19 @@ construct(struct ofproto *ofproto_)
      ofproto->n_hit = 0;
      ofproto->n_missed = 0;
  
+    ofproto->max_n_subfacet = 0;
+    ofproto->created = time_msec();
+    ofproto->last_minute = ofproto->created;
+    memset(&ofproto->hourly, 0, sizeof ofproto->hourly);
+    memset(&ofproto->daily, 0, sizeof ofproto->daily);
+    ofproto->subfacet_add_count = 0;
+    ofproto->subfacet_del_count = 0;
+    ofproto->total_subfacet_add_count = 0;
+    ofproto->total_subfacet_del_count = 0;
+    ofproto->total_subfacet_life_span = 0;
+    ofproto->total_subfacet_count = 0;
+    ofproto->n_update_stats = 0;
+
      return error;
  }
  
@@ -1474,10 +1613,13 @@ run(struct ofproto *ofproto_)
      mac_learning_run(ofproto->ml, &ofproto->backer->revalidate_set);
  
      /* Check the consistency of a random facet, to aid debugging. */
-    if (!hmap_is_empty(&ofproto->facets)
+    if (time_msec() >= ofproto->consistency_rl
+        && !hmap_is_empty(&ofproto->facets)
          && !ofproto->backer->need_revalidate) {
          struct facet *facet;
  
+        ofproto->consistency_rl = time_msec() + 250;
+
          facet = CONTAINER_OF(hmap_random_node(&ofproto->facets),
                               struct facet, hmap_node);
          if (!tag_set_intersects(&ofproto->backer->revalidate_set,
@@ -1644,6 +1786,7 @@ port_construct(struct ofport *port_)
      ofproto->backer->need_revalidate = REV_RECONFIGURE;
      port->bundle = NULL;
      port->cfm = NULL;
+    port->bfd = NULL;
      port->tag = tag_create_random();
      port->may_enable = true;
      port->stp_port = NULL;
@@ -1655,7 +1798,11 @@ port_construct(struct ofport *port_)
      port->carrier_seq = netdev_get_carrier_resets(netdev);
  
      if (netdev_vport_is_patch(netdev)) {
-        /* XXX By bailing out here, we don't do required sFlow work. */
+        /* By bailing out here, we don't submit the port to the sFlow module
+        * to be considered for counter polling export.  This is correct
+        * because the patch port represents an interface that sFlow considers
+        * to be "internal" to the switch as a whole, and therefore not an
+        * candidate for counter polling. */
          port->odp_port = OVSP_NONE;
          return 0;
      }
@@ -1786,6 +1933,32 @@ set_sflow(struct ofproto *ofproto_,
      return 0;
  }
  
+static int
+set_ipfix(
+    struct ofproto *ofproto_,
+    const struct ofproto_ipfix_bridge_exporter_options *bridge_exporter_options,
+    const struct ofproto_ipfix_flow_exporter_options *flow_exporters_options,
+    size_t n_flow_exporters_options)
+{
+    struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
+    struct dpif_ipfix *di = ofproto->ipfix;
+
+    if (bridge_exporter_options || flow_exporters_options) {
+        if (!di) {
+            di = ofproto->ipfix = dpif_ipfix_create();
+        }
+        dpif_ipfix_set_options(
+            di, bridge_exporter_options, flow_exporters_options,
+            n_flow_exporters_options);
+    } else {
+        if (di) {
+            dpif_ipfix_destroy(di);
+            ofproto->ipfix = NULL;
+        }
+    }
+    return 0;
+}
+
  static int
  set_cfm(struct ofport *ofport_, const struct cfm_settings *s)
  {
@@ -1830,6 +2003,35 @@ get_cfm_status(const struct ofport *ofport_,
          return false;
      }
  }
+
+static int
+set_bfd(struct ofport *ofport_, const struct smap *cfg)
+{
+    struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto);
+    struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+    struct bfd *old;
+
+    old = ofport->bfd;
+    ofport->bfd = bfd_configure(old, netdev_get_name(ofport->up.netdev), cfg);
+    if (ofport->bfd != old) {
+        ofproto->backer->need_revalidate = REV_RECONFIGURE;
+    }
+
+    return 0;
+}
+
+static int
+get_bfd_status(struct ofport *ofport_, struct smap *smap)
+{
+    struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+
+    if (ofport->bfd) {
+        bfd_get_status(ofport->bfd, smap);
+        return 0;
+    } else {
+        return ENOENT;
+    }
+}
  \f
  /* Spanning Tree. */
  
@@ -2868,6 +3070,8 @@ mirror_get_stats(struct ofproto *ofproto_, void *aux,
          return 0;
      }
  
+    push_all_stats();
+
      *packets = mirror->packet_count;
      *bytes = mirror->byte_count;
  
@@ -2967,6 +3171,15 @@ port_run_fast(struct ofport_dpif *ofport)
          send_packet(ofport, &packet);
          ofpbuf_uninit(&packet);
      }
+
+    if (ofport->bfd && bfd_should_send_packet(ofport->bfd)) {
+        struct ofpbuf packet;
+
+        ofpbuf_init(&packet, 0);
+        bfd_put_packet(ofport->bfd, &packet, ofport->up.pp.hw_addr);
+        send_packet(ofport, &packet);
+        ofpbuf_uninit(&packet);
+    }
  }
  
  static void
@@ -2997,6 +3210,11 @@ port_run(struct ofport_dpif *ofport)
          }
      }
  
+    if (ofport->bfd) {
+        bfd_run(ofport->bfd);
+        enable = enable && bfd_forwarding(ofport->bfd);
+    }
+
      if (ofport->bundle) {
          enable = enable && lacp_slave_may_enable(ofport->bundle->lacp, ofport);
          if (carrier_changed) {
@@ -3021,6 +3239,10 @@ port_wait(struct ofport_dpif *ofport)
      if (ofport->cfm) {
          cfm_wait(ofport->cfm);
      }
+
+    if (ofport->bfd) {
+        bfd_wait(ofport->bfd);
+    }
  }
  
  static int
@@ -3126,6 +3348,8 @@ port_get_stats(const struct ofport *ofport_, struct netdev_stats *stats)
      struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
      int error;
  
+    push_all_stats();
+
      error = netdev_get_stats(ofport->up.netdev, stats);
  
      if (!error && ofport_->ofp_port == OFPP_LOCAL) {
@@ -3160,23 +3384,6 @@ port_get_stats(const struct ofport *ofport_, struct netdev_stats *stats)
      return error;
  }
  
-/* Account packets for LOCAL port. */
-static void
-ofproto_update_local_port_stats(const struct ofproto *ofproto_,
-                                size_t tx_size, size_t rx_size)
-{
-    struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
-
-    if (rx_size) {
-        ofproto->stats.rx_packets++;
-        ofproto->stats.rx_bytes += rx_size;
-    }
-    if (tx_size) {
-        ofproto->stats.tx_packets++;
-        ofproto->stats.tx_bytes += tx_size;
-    }
-}
-
  struct port_dump_state {
      uint32_t bucket;
      uint32_t offset;
@@ -3342,6 +3549,11 @@ process_special(struct ofproto_dpif *ofproto, const struct flow *flow,
              cfm_process_heartbeat(ofport->cfm, packet);
          }
          return SLOW_CFM;
+    } else if (ofport->bfd && bfd_should_process_flow(flow)) {
+        if (packet) {
+            bfd_process_packet(ofport->bfd, flow, packet);
+        }
+        return SLOW_BFD;
      } else if (ofport->bundle && ofport->bundle->lacp
                 && flow->dl_type == htons(ETH_TYPE_LACP)) {
          if (packet) {
@@ -3669,8 +3881,7 @@ drop_key_clear(struct dpif_backer *backer)
   * to the VLAN TCI with which the packet was really received, that is, the
   * actual VLAN TCI extracted by odp_flow_key_to_flow().  (This differs from
   * the value returned in flow->vlan_tci only for packets received on
- * VLAN splinters.)  Also, if received on an IP tunnel, sets
- * 'initial_vals->tunnel_ip_tos' to the tunnel's IP TOS.
+ * VLAN splinters.)
   *
   * Similarly, this function also includes some logic to help with tunnels.  It
   * may modify 'flow' as necessary to make the tunneling implementation
@@ -3697,60 +3908,47 @@ ofproto_receive(const struct dpif_backer *backer, struct ofpbuf *packet,
  
      if (initial_vals) {
          initial_vals->vlan_tci = flow->vlan_tci;
-        initial_vals->tunnel_ip_tos = flow->tunnel.ip_tos;
      }
  
      if (odp_in_port) {
          *odp_in_port = flow->in_port;
      }
  
-    if (tnl_port_should_receive(flow)) {
-        const struct ofport *ofport = tnl_port_receive(flow);
-        if (!ofport) {
-            flow->in_port = OFPP_NONE;
-            goto exit;
-        }
-        port = ofport_dpif_cast(ofport);
+    port = (tnl_port_should_receive(flow)
+            ? ofport_dpif_cast(tnl_port_receive(flow))
+            : odp_port_to_ofport(backer, flow->in_port));
+    flow->in_port = port ? port->up.ofp_port : OFPP_NONE;
+    if (!port) {
+        goto exit;
+    }
  
-        /* We can't reproduce 'key' from 'flow'. */
-        fitness = fitness == ODP_FIT_PERFECT ? ODP_FIT_TOO_MUCH : fitness;
+    /* XXX: Since the tunnel module is not scoped per backer, for a tunnel port
+     * it's theoretically possible that we'll receive an ofport belonging to an
+     * entirely different datapath.  In practice, this can't happen because no
+     * platforms has two separate datapaths which each support tunneling. */
+    ovs_assert(ofproto_dpif_cast(port->up.ofproto)->backer == backer);
  
-        /* XXX: Since the tunnel module is not scoped per backer, it's
-         * theoretically possible that we'll receive an ofport belonging to an
-         * entirely different datapath.  In practice, this can't happen because
-         * no platforms has two separate datapaths which each support
-         * tunneling. */
-        ovs_assert(ofproto_dpif_cast(port->up.ofproto)->backer == backer);
-    } else {
-        port = odp_port_to_ofport(backer, flow->in_port);
-        if (!port) {
-            flow->in_port = OFPP_NONE;
-            goto exit;
-        }
-
-        flow->in_port = port->up.ofp_port;
-        if (vsp_adjust_flow(ofproto_dpif_cast(port->up.ofproto), flow)) {
-            if (packet) {
-                /* Make the packet resemble the flow, so that it gets sent to
-                 * an OpenFlow controller properly, so that it looks correct
-                 * for sFlow, and so that flow_extract() will get the correct
-                 * vlan_tci if it is called on 'packet'.
-                 *
-                 * The allocated space inside 'packet' probably also contains
-                 * 'key', that is, both 'packet' and 'key' are probably part of
-                 * a struct dpif_upcall (see the large comment on that
-                 * structure definition), so pushing data on 'packet' is in
-                 * general not a good idea since it could overwrite 'key' or
-                 * free it as a side effect.  However, it's OK in this special
-                 * case because we know that 'packet' is inside a Netlink
-                 * attribute: pushing 4 bytes will just overwrite the 4-byte
-                 * "struct nlattr", which is fine since we don't need that
-                 * header anymore. */
-                eth_push_vlan(packet, flow->vlan_tci);
-            }
-            /* We can't reproduce 'key' from 'flow'. */
-            fitness = fitness == ODP_FIT_PERFECT ? ODP_FIT_TOO_MUCH : fitness;
+    if (vsp_adjust_flow(ofproto_dpif_cast(port->up.ofproto), flow)) {
+        if (packet) {
+            /* Make the packet resemble the flow, so that it gets sent to
+             * an OpenFlow controller properly, so that it looks correct
+             * for sFlow, and so that flow_extract() will get the correct
+             * vlan_tci if it is called on 'packet'.
+             *
+             * The allocated space inside 'packet' probably also contains
+             * 'key', that is, both 'packet' and 'key' are probably part of
+             * a struct dpif_upcall (see the large comment on that
+             * structure definition), so pushing data on 'packet' is in
+             * general not a good idea since it could overwrite 'key' or
+             * free it as a side effect.  However, it's OK in this special
+             * case because we know that 'packet' is inside a Netlink
+             * attribute: pushing 4 bytes will just overwrite the 4-byte
+             * "struct nlattr", which is fine since we don't need that
+             * header anymore. */
+            eth_push_vlan(packet, flow->vlan_tci);
          }
+        /* We can't reproduce 'key' from 'flow'. */
+        fitness = fitness == ODP_FIT_PERFECT ? ODP_FIT_TOO_MUCH : fitness;
      }
      error = 0;
  
@@ -3875,9 +4073,11 @@ handle_miss_upcalls(struct dpif_backer *backer, struct dpif_upcall *upcalls,
      hmap_destroy(&todo);
  }
  
-static enum { SFLOW_UPCALL, MISS_UPCALL, BAD_UPCALL }
+static enum { SFLOW_UPCALL, MISS_UPCALL, BAD_UPCALL, FLOW_SAMPLE_UPCALL,
+              IPFIX_UPCALL }
  classify_upcall(const struct dpif_upcall *upcall)
  {
+    size_t userdata_len;
      union user_action_cookie cookie;
  
      /* First look at the upcall type. */
@@ -3899,23 +4099,30 @@ classify_upcall(const struct dpif_upcall *upcall)
          VLOG_WARN_RL(&rl, "action upcall missing cookie");
          return BAD_UPCALL;
      }
-    if (nl_attr_get_size(upcall->userdata) != sizeof(cookie)) {
+    userdata_len = nl_attr_get_size(upcall->userdata);
+    if (userdata_len < sizeof cookie.type
+        || userdata_len > sizeof cookie) {
          VLOG_WARN_RL(&rl, "action upcall cookie has unexpected size %zu",
-                     nl_attr_get_size(upcall->userdata));
+                     userdata_len);
          return BAD_UPCALL;
      }
-    memcpy(&cookie, nl_attr_get(upcall->userdata), sizeof(cookie));
-    switch (cookie.type) {
-    case USER_ACTION_COOKIE_SFLOW:
+    memset(&cookie, 0, sizeof cookie);
+    memcpy(&cookie, nl_attr_get(upcall->userdata), userdata_len);
+    if (userdata_len == sizeof cookie.sflow
+        && cookie.type == USER_ACTION_COOKIE_SFLOW) {
          return SFLOW_UPCALL;
-
-    case USER_ACTION_COOKIE_SLOW_PATH:
+    } else if (userdata_len == sizeof cookie.slow_path
+               && cookie.type == USER_ACTION_COOKIE_SLOW_PATH) {
          return MISS_UPCALL;
-
-    case USER_ACTION_COOKIE_UNSPEC:
-    default:
-        VLOG_WARN_RL(&rl, "invalid user cookie : 0x%"PRIx64,
-                     nl_attr_get_u64(upcall->userdata));
+    } else if (userdata_len == sizeof cookie.flow_sample
+               && cookie.type == USER_ACTION_COOKIE_FLOW_SAMPLE) {
+        return FLOW_SAMPLE_UPCALL;
+    } else if (userdata_len == sizeof cookie.ipfix
+               && cookie.type == USER_ACTION_COOKIE_IPFIX) {
+        return IPFIX_UPCALL;
+    } else {
+        VLOG_WARN_RL(&rl, "invalid user cookie of type %"PRIu16
+                     " and size %zu", cookie.type, userdata_len);
          return BAD_UPCALL;
      }
  }
@@ -3935,11 +4142,56 @@ handle_sflow_upcall(struct dpif_backer *backer,
          return;
      }
  
-    memcpy(&cookie, nl_attr_get(upcall->userdata), sizeof(cookie));
+    memset(&cookie, 0, sizeof cookie);
+    memcpy(&cookie, nl_attr_get(upcall->userdata), sizeof cookie.sflow);
      dpif_sflow_received(ofproto->sflow, upcall->packet, &flow,
                          odp_in_port, &cookie);
  }
  
+static void
+handle_flow_sample_upcall(struct dpif_backer *backer,
+                          const struct dpif_upcall *upcall)
+{
+    struct ofproto_dpif *ofproto;
+    union user_action_cookie cookie;
+    struct flow flow;
+
+    if (ofproto_receive(backer, upcall->packet, upcall->key, upcall->key_len,
+                        &flow, NULL, &ofproto, NULL, NULL)
+        || !ofproto->ipfix) {
+        return;
+    }
+
+    memset(&cookie, 0, sizeof cookie);
+    memcpy(&cookie, nl_attr_get(upcall->userdata), sizeof cookie.flow_sample);
+
+    /* The flow reflects exactly the contents of the packet.  Sample
+     * the packet using it. */
+    dpif_ipfix_flow_sample(ofproto->ipfix, upcall->packet, &flow,
+                           cookie.flow_sample.collector_set_id,
+                           cookie.flow_sample.probability,
+                           cookie.flow_sample.obs_domain_id,
+                           cookie.flow_sample.obs_point_id);
+}
+
+static void
+handle_ipfix_upcall(struct dpif_backer *backer,
+                    const struct dpif_upcall *upcall)
+{
+    struct ofproto_dpif *ofproto;
+    struct flow flow;
+
+    if (ofproto_receive(backer, upcall->packet, upcall->key, upcall->key_len,
+                        &flow, NULL, &ofproto, NULL, NULL)
+        || !ofproto->ipfix) {
+        return;
+    }
+
+    /* The flow reflects exactly the contents of the packet.  Sample
+     * the packet using it. */
+    dpif_ipfix_bridge_sample(ofproto->ipfix, upcall->packet, &flow);
+}
+
  static int
  handle_upcalls(struct dpif_backer *backer, unsigned int max_batch)
  {
@@ -3977,6 +4229,16 @@ handle_upcalls(struct dpif_backer *backer, unsigned int max_batch)
              ofpbuf_uninit(buf);
              break;
  
+        case FLOW_SAMPLE_UPCALL:
+            handle_flow_sample_upcall(backer, upcall);
+            ofpbuf_uninit(buf);
+            break;
+
+        case IPFIX_UPCALL:
+            handle_ipfix_upcall(backer, upcall);
+            ofpbuf_uninit(buf);
+            break;
+
          case BAD_UPCALL:
              ofpbuf_uninit(buf);
              break;
@@ -4026,6 +4288,9 @@ expire(struct dpif_backer *backer)
              continue;
          }
  
+        /* Keep track of the max number of flows per ofproto_dpif. */
+        update_max_subfacet_count(ofproto);
+
          /* Expire subfacets that have been idle too long. */
          dp_max_idle = subfacet_max_idle(ofproto);
          expire_subfacets(ofproto, dp_max_idle);
@@ -4087,7 +4352,6 @@ update_subfacet_stats(struct subfacet *subfacet,
          facet_account(facet);
          facet->accounted_bytes = facet->byte_count;
      }
-    facet_push_stats(facet);
  }
  
  /* 'key' with length 'key_len' bytes is a flow in 'dpif' that we know nothing
@@ -4131,13 +4395,13 @@ update_stats(struct dpif_backer *backer)
      const struct dpif_flow_stats *stats;
      struct dpif_flow_dump dump;
      const struct nlattr *key;
+    struct ofproto_dpif *ofproto;
      size_t key_len;
  
      dpif_flow_dump_start(&dump, backer->dpif);
      while (dpif_flow_dump_next(&dump, &key, &key_len, NULL, NULL, &stats)) {
          struct flow flow;
          struct subfacet *subfacet;
-        struct ofproto_dpif *ofproto;
          struct ofport_dpif *ofport;
          uint32_t key_hash;
  
@@ -4146,6 +4410,9 @@ update_stats(struct dpif_backer *backer)
              continue;
          }
  
+        ofproto->total_subfacet_count += hmap_count(&ofproto->subfacets);
+        ofproto->n_update_stats++;
+
          ofport = get_ofp_port(ofproto, flow.in_port);
          if (ofport && ofport->tnl_port) {
              netdev_vport_inc_rx(ofport->up.netdev, stats);
@@ -4173,8 +4440,14 @@ update_stats(struct dpif_backer *backer)
              delete_unexpected_flow(ofproto, key, key_len);
              break;
          }
+        run_fast_rl();
      }
      dpif_flow_dump_done(&dump);
+
+    HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
+        update_moving_averages(ofproto);
+    }
+
  }
  
  /* Calculates and returns the number of milliseconds of idle time after which
@@ -4284,7 +4557,7 @@ expire_subfacets(struct ofproto_dpif *ofproto, int dp_max_idle)
                          &ofproto->subfacets) {
          long long int cutoff;
  
-        cutoff = (subfacet->slow & (SLOW_CFM | SLOW_LACP | SLOW_STP)
+        cutoff = (subfacet->slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)
                    ? special_cutoff
                    : normal_cutoff);
          if (subfacet->used < cutoff) {
@@ -4371,6 +4644,8 @@ facet_create(struct rule_dpif *rule, const struct flow *flow, uint32_t hash)
      netflow_flow_init(&facet->nf_flow);
      netflow_flow_update_time(ofproto->netflow, &facet->nf_flow, facet->used);
  
+    facet->learn_rl = time_msec() + 500;
+
      return facet;
  }
  
@@ -4445,8 +4720,15 @@ facet_learn(struct facet *facet)
      struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
      struct subfacet *subfacet= CONTAINER_OF(list_front(&facet->subfacets),
                                              struct subfacet, list_node);
+    long long int now = time_msec();
      struct action_xlate_ctx ctx;
  
+    if (!facet->has_fin_timeout && now < facet->learn_rl) {
+        return;
+    }
+
+    facet->learn_rl = now + 500;
+
      if (!facet->has_learn
          && !facet->has_normal
          && (!facet->has_fin_timeout
@@ -4562,9 +4844,6 @@ facet_flush_stats(struct facet *facet)
          netflow_expire(ofproto->netflow, &facet->nf_flow, &expired);
      }
  
-    facet->rule->packet_count += facet->packet_count;
-    facet->rule->byte_count += facet->byte_count;
-
      /* Reset counters to prevent double counting if 'facet' ever gets
       * reinstalled. */
      facet_reset_counters(facet);
@@ -4946,6 +5225,7 @@ facet_push_stats(struct facet *facet)
          facet->prev_byte_count = facet->byte_count;
          facet->prev_used = facet->used;
  
+        rule_credit_stats(facet->rule, &stats);
          flow_push_stats(facet, &stats);
  
          update_mirror_stats(ofproto_dpif_cast(facet->rule->up.ofproto),
@@ -4953,6 +5233,36 @@ facet_push_stats(struct facet *facet)
      }
  }
  
+static void
+push_all_stats__(bool run_fast)
+{
+    static long long int rl = LLONG_MIN;
+    struct ofproto_dpif *ofproto;
+
+    if (time_msec() < rl) {
+        return;
+    }
+
+    HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
+        struct facet *facet;
+
+        HMAP_FOR_EACH (facet, hmap_node, &ofproto->facets) {
+            facet_push_stats(facet);
+            if (run_fast) {
+                run_fast_rl();
+            }
+        }
+    }
+
+    rl = time_msec() + 100;
+}
+
+static void
+push_all_stats(void)
+{
+    push_all_stats__(true);
+}
+
  static void
  rule_credit_stats(struct rule_dpif *rule, const struct dpif_flow_stats *stats)
  {
@@ -5044,6 +5354,7 @@ subfacet_create(struct facet *facet, struct flow_miss *miss,
      subfacet->key = xmemdup(key, key_len);
      subfacet->key_len = key_len;
      subfacet->used = now;
+    subfacet->created = now;
      subfacet->dp_packet_count = 0;
      subfacet->dp_byte_count = 0;
      subfacet->actions_len = 0;
@@ -5055,6 +5366,7 @@ subfacet_create(struct facet *facet, struct flow_miss *miss,
      subfacet->initial_vals = miss->initial_vals;
      subfacet->odp_in_port = miss->odp_in_port;
  
+    ofproto->subfacet_add_count++;
      return subfacet;
  }
  
@@ -5066,6 +5378,10 @@ subfacet_destroy__(struct subfacet *subfacet)
      struct facet *facet = subfacet->facet;
      struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
  
+    /* Update ofproto stats before uninstall the subfacet. */
+    ofproto->subfacet_del_count++;
+    ofproto->total_subfacet_life_span += (time_msec() - subfacet->created);
+
      subfacet_uninstall(subfacet);
      hmap_remove(&ofproto->subfacets, &subfacet->hmap_node);
      list_remove(&subfacet->list_node);
@@ -5113,6 +5429,7 @@ subfacet_destroy_batch(struct ofproto_dpif *ofproto,
          subfacet_reset_dp_stats(subfacets[i], &stats[i]);
          subfacets[i]->path = SF_NOT_INSTALLED;
          subfacet_destroy(subfacets[i]);
+        run_fast_rl();
      }
  }
  
@@ -5426,20 +5743,17 @@ static void
  rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes)
  {
      struct rule_dpif *rule = rule_dpif_cast(rule_);
-    struct facet *facet;
+
+    /* push_all_stats() can handle flow misses which, when using the learn
+     * action, can cause rules to be added and deleted.  This can corrupt our
+     * caller's datastructures which assume that rule_get_stats() doesn't have
+     * an impact on the flow table. To be safe, we disable miss handling. */
+    push_all_stats__(false);
  
      /* Start from historical data for 'rule' itself that are no longer tracked
       * in facets.  This counts, for example, facets that have expired. */
      *packets = rule->packet_count;
      *bytes = rule->byte_count;
-
-    /* Add any statistics that are tracked by facets.  This includes
-     * statistical data recently updated by ofproto_update_stats() as well as
-     * stats for packets that were executed "by hand" via dpif_execute(). */
-    LIST_FOR_EACH (facet, list_node, &rule->facets) {
-        *packets += facet->packet_count;
-        *bytes += facet->byte_count;
-    }
  }
  
  static void
@@ -5457,7 +5771,6 @@ rule_dpif_execute(struct rule_dpif *rule, const struct flow *flow,
      rule_credit_stats(rule, &stats);
  
      initial_vals.vlan_tci = flow->vlan_tci;
-    initial_vals.tunnel_ip_tos = flow->tunnel.ip_tos;
      ofpbuf_use_stub(&odp_actions, odp_actions_stub, sizeof odp_actions_stub);
      action_xlate_ctx_init(&ctx, ofproto, flow, &initial_vals,
                            rule, stats.tcp_flags, packet);
@@ -5493,68 +5806,33 @@ rule_modify_actions(struct rule *rule_)
  static int
  send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet)
  {
-    const struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
+    struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
      uint64_t odp_actions_stub[1024 / 8];
      struct ofpbuf key, odp_actions;
+    struct dpif_flow_stats stats;
      struct odputil_keybuf keybuf;
-    uint32_t odp_port;
+    struct ofpact_output output;
+    struct action_xlate_ctx ctx;
      struct flow flow;
      int error;
  
-    flow_extract(packet, 0, 0, NULL, OFPP_LOCAL, &flow);
-    if (netdev_vport_is_patch(ofport->up.netdev)) {
-        struct ofproto_dpif *peer_ofproto;
-        struct dpif_flow_stats stats;
-        struct ofport_dpif *peer;
-        struct rule_dpif *rule;
-
-        peer = ofport_get_peer(ofport);
-        if (!peer) {
-            return ENODEV;
-        }
-
-        dpif_flow_stats_extract(&flow, packet, time_msec(), &stats);
-        netdev_vport_inc_tx(ofport->up.netdev, &stats);
-        netdev_vport_inc_rx(peer->up.netdev, &stats);
-
-        flow.in_port = peer->up.ofp_port;
-        peer_ofproto = ofproto_dpif_cast(peer->up.ofproto);
-        rule = rule_dpif_lookup(peer_ofproto, &flow);
-        rule_dpif_execute(rule, &flow, packet);
-
-        return 0;
-    }
-
      ofpbuf_use_stub(&odp_actions, odp_actions_stub, sizeof odp_actions_stub);
+    ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
  
-    if (ofport->tnl_port) {
-        struct dpif_flow_stats stats;
-
-        odp_port = tnl_port_send(ofport->tnl_port, &flow);
-        if (odp_port == OVSP_NONE) {
-            return ENODEV;
-        }
-
-        dpif_flow_stats_extract(&flow, packet, time_msec(), &stats);
-        netdev_vport_inc_tx(ofport->up.netdev, &stats);
-        odp_put_tunnel_action(&flow.tunnel, &odp_actions);
-        odp_put_skb_mark_action(flow.skb_mark, &odp_actions);
-    } else {
-        odp_port = vsp_realdev_to_vlandev(ofproto, ofport->odp_port,
-                                          flow.vlan_tci);
-        if (odp_port != ofport->odp_port) {
-            eth_pop_vlan(packet);
-            flow.vlan_tci = htons(0);
-        }
-    }
+    /* Use OFPP_NONE as the in_port to avoid special packet processing. */
+    flow_extract(packet, 0, 0, NULL, OFPP_NONE, &flow);
+    odp_flow_key_from_flow(&key, &flow, ofp_port_to_odp_port(ofproto,
+                                                             OFPP_LOCAL));
+    dpif_flow_stats_extract(&flow, packet, time_msec(), &stats);
  
-    ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
-    odp_flow_key_from_flow(&key, &flow,
-                           ofp_port_to_odp_port(ofproto, flow.in_port));
+    ofpact_init(&output.ofpact, OFPACT_OUTPUT, sizeof output);
+    output.port = ofport->up.ofp_port;
+    output.max_len = 0;
  
-    compose_sflow_action(ofproto, &odp_actions, &flow, odp_port);
+    action_xlate_ctx_init(&ctx, ofproto, &flow, NULL, NULL, 0, packet);
+    ctx.resubmit_stats = &stats;
+    xlate_actions(&ctx, &output.ofpact, sizeof output, &odp_actions);
  
-    nl_msg_put_u32(&odp_actions, OVS_ACTION_ATTR_OUTPUT, odp_port);
      error = dpif_execute(ofproto->backer->dpif,
                           key.data, key.size,
                           odp_actions.data, odp_actions.size,
@@ -5562,10 +5840,13 @@ send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet)
      ofpbuf_uninit(&odp_actions);
  
      if (error) {
-        VLOG_WARN_RL(&rl, "%s: failed to send packet on port %"PRIu32" (%s)",
-                     ofproto->up.name, odp_port, strerror(error));
+        VLOG_WARN_RL(&rl, "%s: failed to send packet on port %s (%s)",
+                     ofproto->up.name, netdev_get_name(ofport->up.netdev),
+                     strerror(error));
      }
-    ofproto_update_local_port_stats(ofport->up.ofproto, packet->size, 0);
+
+    ofproto->stats.tx_packets++;
+    ofproto->stats.tx_bytes += packet->size;
      return error;
  }
  \f
@@ -5600,11 +5881,12 @@ compose_slow_path(const struct ofproto_dpif *ofproto, const struct flow *flow,
      cookie.slow_path.reason = slow;
  
      ofpbuf_use_stack(&buf, stub, stub_size);
-    if (slow & (SLOW_CFM | SLOW_LACP | SLOW_STP)) {
+    if (slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)) {
          uint32_t pid = dpif_port_get_pid(ofproto->backer->dpif, UINT32_MAX);
-        odp_put_userspace_action(pid, &cookie, sizeof cookie, &buf);
+        odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, &buf);
      } else {
-        put_userspace_action(ofproto, &buf, flow, &cookie);
+        put_userspace_action(ofproto, &buf, flow, &cookie,
+                             sizeof cookie.slow_path);
      }
      *actionsp = buf.data;
      *actions_lenp = buf.size;
@@ -5614,14 +5896,43 @@ static size_t
  put_userspace_action(const struct ofproto_dpif *ofproto,
                       struct ofpbuf *odp_actions,
                       const struct flow *flow,
-                     const union user_action_cookie *cookie)
+                     const union user_action_cookie *cookie,
+                     const size_t cookie_size)
  {
      uint32_t pid;
  
      pid = dpif_port_get_pid(ofproto->backer->dpif,
                              ofp_port_to_odp_port(ofproto, flow->in_port));
  
-    return odp_put_userspace_action(pid, cookie, sizeof *cookie, odp_actions);
+    return odp_put_userspace_action(pid, cookie, cookie_size, odp_actions);
+}
+
+/* Compose SAMPLE action for sFlow or IPFIX.  The given probability is
+ * the number of packets out of UINT32_MAX to sample.  The given
+ * cookie is passed back in the callback for each sampled packet.
+ */
+static size_t
+compose_sample_action(const struct ofproto_dpif *ofproto,
+                      struct ofpbuf *odp_actions,
+                      const struct flow *flow,
+                      const uint32_t probability,
+                      const union user_action_cookie *cookie,
+                      const size_t cookie_size)
+{
+    size_t sample_offset, actions_offset;
+    int cookie_offset;
+
+    sample_offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SAMPLE);
+
+    nl_msg_put_u32(odp_actions, OVS_SAMPLE_ATTR_PROBABILITY, probability);
+
+    actions_offset = nl_msg_start_nested(odp_actions, OVS_SAMPLE_ATTR_ACTIONS);
+    cookie_offset = put_userspace_action(ofproto, odp_actions, flow, cookie,
+                                         cookie_size);
+
+    nl_msg_end_nested(odp_actions, actions_offset);
+    nl_msg_end_nested(odp_actions, sample_offset);
+    return cookie_offset;
  }
  
  static void
@@ -5656,7 +5967,7 @@ compose_sflow_cookie(const struct ofproto_dpif *ofproto,
      }
  }
  
-/* Compose SAMPLE action for sFlow. */
+/* Compose SAMPLE action for sFlow bridge sampling. */
  static size_t
  compose_sflow_action(const struct ofproto_dpif *ofproto,
                       struct ofpbuf *odp_actions,
@@ -5665,32 +5976,60 @@ compose_sflow_action(const struct ofproto_dpif *ofproto,
  {
      uint32_t probability;
      union user_action_cookie cookie;
-    size_t sample_offset, actions_offset;
-    int cookie_offset;
  
      if (!ofproto->sflow || flow->in_port == OFPP_NONE) {
          return 0;
      }
  
-    sample_offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SAMPLE);
-
-    /* Number of packets out of UINT_MAX to sample. */
      probability = dpif_sflow_get_probability(ofproto->sflow);
-    nl_msg_put_u32(odp_actions, OVS_SAMPLE_ATTR_PROBABILITY, probability);
-
-    actions_offset = nl_msg_start_nested(odp_actions, OVS_SAMPLE_ATTR_ACTIONS);
      compose_sflow_cookie(ofproto, htons(0), odp_port,
                           odp_port == OVSP_NONE ? 0 : 1, &cookie);
-    cookie_offset = put_userspace_action(ofproto, odp_actions, flow, &cookie);
  
-    nl_msg_end_nested(odp_actions, actions_offset);
-    nl_msg_end_nested(odp_actions, sample_offset);
-    return cookie_offset;
+    return compose_sample_action(ofproto, odp_actions, flow,  probability,
+                                 &cookie, sizeof cookie.sflow);
+}
+
+static void
+compose_flow_sample_cookie(uint16_t probability, uint32_t collector_set_id,
+                           uint32_t obs_domain_id, uint32_t obs_point_id,
+                           union user_action_cookie *cookie)
+{
+    cookie->type = USER_ACTION_COOKIE_FLOW_SAMPLE;
+    cookie->flow_sample.probability = probability;
+    cookie->flow_sample.collector_set_id = collector_set_id;
+    cookie->flow_sample.obs_domain_id = obs_domain_id;
+    cookie->flow_sample.obs_point_id = obs_point_id;
+}
+
+static void
+compose_ipfix_cookie(union user_action_cookie *cookie)
+{
+    cookie->type = USER_ACTION_COOKIE_IPFIX;
+}
+
+/* Compose SAMPLE action for IPFIX bridge sampling. */
+static void
+compose_ipfix_action(const struct ofproto_dpif *ofproto,
+                     struct ofpbuf *odp_actions,
+                     const struct flow *flow)
+{
+    uint32_t probability;
+    union user_action_cookie cookie;
+
+    if (!ofproto->ipfix || flow->in_port == OFPP_NONE) {
+        return;
+    }
+
+    probability = dpif_ipfix_get_bridge_exporter_probability(ofproto->ipfix);
+    compose_ipfix_cookie(&cookie);
+
+    compose_sample_action(ofproto, odp_actions, flow,  probability,
+                          &cookie, sizeof cookie.ipfix);
  }
  
-/* SAMPLE action must be first action in any given list of actions.
- * At this point we do not have all information required to build it. So try to
- * build sample action as complete as possible. */
+/* SAMPLE action for sFlow must be first action in any given list of
+ * actions.  At this point we do not have all information required to
+ * build it. So try to build sample action as complete as possible. */
  static void
  add_sflow_action(struct action_xlate_ctx *ctx)
  {
@@ -5701,6 +6040,14 @@ add_sflow_action(struct action_xlate_ctx *ctx)
      ctx->sflow_n_outputs = 0;
  }
  
+/* SAMPLE action for IPFIX must be 1st or 2nd action in any given list
+ * of actions, eventually after the SAMPLE action for sFlow. */
+static void
+add_ipfix_action(struct action_xlate_ctx *ctx)
+{
+    compose_ipfix_action(ctx->ofproto, ctx->odp_actions, &ctx->flow);
+}
+
  /* Fix SAMPLE action according to data collected while composing ODP actions.
   * We need to fix SAMPLE actions OVS_SAMPLE_ATTR_ACTIONS attribute, i.e. nested
   * USERSPACE action's user-cookie which is required for sflow. */
@@ -5715,7 +6062,7 @@ fix_sflow_action(struct action_xlate_ctx *ctx)
      }
  
      cookie = ofpbuf_at(ctx->odp_actions, ctx->user_cookie_offset,
-                       sizeof(*cookie));
+                       sizeof cookie->sflow);
      ovs_assert(cookie->type == USER_ACTION_COOKIE_SFLOW);
  
      compose_sflow_cookie(ctx->ofproto, base->vlan_tci,
@@ -5727,9 +6074,9 @@ compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port,
                          bool check_stp)
  {
      const struct ofport_dpif *ofport = get_ofp_port(ctx->ofproto, ofp_port);
-    ovs_be16 flow_vlan_tci = ctx->flow.vlan_tci;
-    ovs_be64 flow_tun_id = ctx->flow.tunnel.tun_id;
-    uint8_t flow_nw_tos = ctx->flow.nw_tos;
+    ovs_be16 flow_vlan_tci;
+    uint32_t flow_skb_mark;
+    uint8_t flow_nw_tos;
      struct priority_to_dscp *pdscp;
      uint32_t out_port, odp_port;
  
@@ -5802,6 +6149,10 @@ compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port,
          return;
      }
  
+    flow_vlan_tci = ctx->flow.vlan_tci;
+    flow_skb_mark = ctx->flow.skb_mark;
+    flow_nw_tos = ctx->flow.nw_tos;
+
      pdscp = get_priority(ofport, ctx->flow.skb_priority);
      if (pdscp) {
          ctx->flow.nw_tos &= ~IP_DSCP_MASK;
@@ -5809,18 +6160,27 @@ compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port,
      }
  
      if (ofport->tnl_port) {
+         /* Save tunnel metadata so that changes made due to
+          * the Logical (tunnel) Port are not visible for any further
+          * matches, while explicit set actions on tunnel metadata are.
+          */
+        struct flow_tnl flow_tnl = ctx->flow.tunnel;
          odp_port = tnl_port_send(ofport->tnl_port, &ctx->flow);
          if (odp_port == OVSP_NONE) {
              xlate_report(ctx, "Tunneling decided against output");
-            return;
+            goto out; /* restore flow_nw_tos */
+        }
+        if (ctx->flow.tunnel.ip_dst == ctx->orig_tunnel_ip_dst) {
+            xlate_report(ctx, "Not tunneling to our own address");
+            goto out; /* restore flow_nw_tos */
          }
-
          if (ctx->resubmit_stats) {
              netdev_vport_inc_tx(ofport->up.netdev, ctx->resubmit_stats);
          }
          out_port = odp_port;
          commit_odp_tunnel_action(&ctx->flow, &ctx->base_flow,
                                   ctx->odp_actions);
+        ctx->flow.tunnel = flow_tnl; /* Restore tunnel metadata */
      } else {
          odp_port = ofport->odp_port;
          out_port = vsp_realdev_to_vlandev(ctx->ofproto, odp_port,
@@ -5836,8 +6196,11 @@ compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port,
      ctx->sflow_odp_port = odp_port;
      ctx->sflow_n_outputs++;
      ctx->nf_output_iface = ofp_port;
-    ctx->flow.tunnel.tun_id = flow_tun_id;
+
+    /* Restore flow */
      ctx->flow.vlan_tci = flow_vlan_tci;
+    ctx->flow.skb_mark = flow_skb_mark;
+ out:
      ctx->flow.nw_tos = flow_nw_tos;
  }
  
@@ -6253,11 +6616,6 @@ xlate_set_queue_action(struct action_xlate_ctx *ctx, uint32_t queue_id)
      }
  }
  
-struct xlate_reg_state {
-    ovs_be16 vlan_tci;
-    ovs_be64 tun_id;
-};
-
  static bool
  slave_enabled_cb(uint16_t ofp_port, void *ofproto_)
  {
@@ -6338,6 +6696,23 @@ xlate_fin_timeout(struct action_xlate_ctx *ctx,
      }
  }
  
+static void
+xlate_sample_action(struct action_xlate_ctx *ctx,
+                    const struct ofpact_sample *os)
+{
+  union user_action_cookie cookie;
+  /* Scale the probability from 16-bit to 32-bit while representing
+   * the same percentage. */
+  uint32_t probability = (os->probability << 16) | os->probability;
+
+  commit_odp_actions(&ctx->flow, &ctx->base_flow, ctx->odp_actions);
+
+  compose_flow_sample_cookie(os->probability, os->collector_set_id,
+                             os->obs_domain_id, os->obs_point_id, &cookie);
+  compose_sample_action(ctx->ofproto, ctx->odp_actions, &ctx->flow,
+                        probability, &cookie, sizeof cookie.flow_sample);
+}
+
  static bool
  may_receive(const struct ofport_dpif *port, struct action_xlate_ctx *ctx)
  {
@@ -6363,7 +6738,7 @@ static bool
  tunnel_ecn_ok(struct action_xlate_ctx *ctx)
  {
      if (is_ip_any(&ctx->base_flow)
-        && (ctx->base_flow.tunnel.ip_tos & IP_ECN_MASK) == IP_ECN_CE) {
+        && (ctx->flow.tunnel.ip_tos & IP_ECN_MASK) == IP_ECN_CE) {
          if ((ctx->base_flow.nw_tos & IP_ECN_MASK) == IP_ECN_NOT_ECT) {
              VLOG_WARN_RL(&rl, "dropping tunnel packet marked ECN CE"
                           " but is not ECN capable");
@@ -6618,6 +6993,10 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
              }
              break;
          }
+
+        case OFPACT_SAMPLE:
+            xlate_sample_action(ctx, ofpact_get_SAMPLE(a));
+            break;
          }
      }
  
@@ -6634,8 +7013,6 @@ action_xlate_ctx_init(struct action_xlate_ctx *ctx,
                        struct rule_dpif *rule,
                        uint8_t tcp_flags, const struct ofpbuf *packet)
  {
-    ovs_be64 initial_tun_id = flow->tunnel.tun_id;
-
      /* Flow initialization rules:
       * - 'base_flow' must match the kernel's view of the packet at the
       *   time that action processing starts.  'flow' represents any
@@ -6647,23 +7024,21 @@ action_xlate_ctx_init(struct action_xlate_ctx *ctx,
       *   to another device without any modifications this will cause us to
       *   insert a new tag since the original one was stripped off by the
       *   VLAN device.
-     * - Tunnel 'flow' is largely cleared when transitioning between
-     *   the input and output stages since it does not make sense to output
-     *   a packet with the exact headers that it was received with (i.e.
-     *   the destination IP is us).  The one exception is the tun_id, which
-     *   is preserved to allow use in later resubmit lookups and loads into
-     *   registers.
+     * - Tunnel metadata as received is retained in 'flow'. This allows
+     *   tunnel metadata matching also in later tables.
+     *   Since a kernel action for setting the tunnel metadata will only be
+     *   generated with actual tunnel output, changing the tunnel metadata
+     *   values in 'flow' (such as tun_id) will only have effect with a later
+     *   tunnel output action.
       * - Tunnel 'base_flow' is completely cleared since that is what the
       *   kernel does.  If we wish to maintain the original values an action
       *   needs to be generated. */
  
      ctx->ofproto = ofproto;
      ctx->flow = *flow;
-    memset(&ctx->flow.tunnel, 0, sizeof ctx->flow.tunnel);
      ctx->base_flow = ctx->flow;
-    ctx->base_flow.vlan_tci = initial_vals->vlan_tci;
-    ctx->base_flow.tunnel.ip_tos = initial_vals->tunnel_ip_tos;
-    ctx->flow.tunnel.tun_id = initial_tun_id;
+    memset(&ctx->base_flow.tunnel, 0, sizeof ctx->base_flow.tunnel);
+    ctx->orig_tunnel_ip_dst = flow->tunnel.ip_dst;
      ctx->rule = rule;
      ctx->packet = packet;
      ctx->may_learn = packet != NULL;
@@ -6671,6 +7046,10 @@ action_xlate_ctx_init(struct action_xlate_ctx *ctx,
      ctx->resubmit_hook = NULL;
      ctx->report_hook = NULL;
      ctx->resubmit_stats = NULL;
+
+    if (initial_vals) {
+        ctx->base_flow.vlan_tci = initial_vals->vlan_tci;
+    }
  }
  
  /* Translates the 'ofpacts_len' bytes of "struct ofpacts" starting at 'ofpacts'
@@ -6746,12 +7125,14 @@ xlate_actions(struct action_xlate_ctx *ctx,
      } else {
          static struct vlog_rate_limit trace_rl = VLOG_RATE_LIMIT_INIT(1, 1);
          struct initial_vals initial_vals;
+        size_t sample_actions_len;
          uint32_t local_odp_port;
  
          initial_vals.vlan_tci = ctx->base_flow.vlan_tci;
-        initial_vals.tunnel_ip_tos = ctx->base_flow.tunnel.ip_tos;
  
          add_sflow_action(ctx);
+        add_ipfix_action(ctx);
+        sample_actions_len = ctx->odp_actions->size;
  
          if (tunnel_ecn_ok(ctx) && (!in_port || may_receive(in_port, ctx))) {
              do_xlate_actions(ofpacts, ofpacts_len, ctx);
@@ -6759,8 +7140,7 @@ xlate_actions(struct action_xlate_ctx *ctx,
              /* We've let OFPP_NORMAL and the learning action look at the
               * packet, so drop it now if forwarding is disabled. */
              if (in_port && !stp_forward_in_state(in_port->stp_state)) {
-                ofpbuf_clear(ctx->odp_actions);
-                add_sflow_action(ctx);
+                ctx->odp_actions->size = sample_actions_len;
              }
          }
  
@@ -7512,7 +7892,6 @@ packet_out(struct ofproto *ofproto_, struct ofpbuf *packet,
      dpif_flow_stats_extract(flow, packet, time_msec(), &stats);
  
      initial_vals.vlan_tci = flow->vlan_tci;
-    initial_vals.tunnel_ip_tos = 0;
      action_xlate_ctx_init(&ctx, ofproto, flow, &initial_vals, NULL,
                            packet_get_tcp_flags(packet, flow), packet);
      ctx.resubmit_stats = &stats;
@@ -7815,7 +8194,6 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
              }
  
              initial_vals.vlan_tci = flow.vlan_tci;
-            initial_vals.tunnel_ip_tos = flow.tunnel.ip_tos;
          }
  
          /* Generate a packet, if requested. */
@@ -7850,7 +8228,6 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
          flow_extract(packet, priority, mark, NULL, in_port, &flow);
          flow.tunnel.tun_id = tun_id;
          initial_vals.vlan_tci = flow.vlan_tci;
-        initial_vals.tunnel_ip_tos = flow.tunnel.ip_tos;
      } else {
          unixctl_command_reply_error(conn, "Bad command syntax");
          goto exit;
@@ -7929,6 +8306,9 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
                  case SLOW_STP:
                      ds_put_cstr(ds, "\n\t- Consists of STP packets.");
                      break;
+                case SLOW_BFD:
+                    ds_put_cstr(ds, "\n\t- Consists of BFD packets.");
+                    break;
                  case SLOW_IN_BAND:
                      ds_put_cstr(ds, "\n\t- Needs in-band special case "
                                  "processing.");
@@ -8071,14 +8451,40 @@ show_dp_format(const struct ofproto_dpif *ofproto, struct ds *ds)
  {
      const struct shash_node **ports;
      int i;
+    struct avg_subfacet_rates lifetime;
+    unsigned long long int minutes;
+    const int min_ms = 60 * 1000; /* milliseconds in one minute. */
+
+    minutes = (time_msec() - ofproto->created) / min_ms;
+
+    if (minutes > 0) {
+        lifetime.add_rate = (double)ofproto->total_subfacet_add_count
+                            / minutes;
+        lifetime.del_rate = (double)ofproto->total_subfacet_del_count
+                            / minutes;
+    }else {
+        lifetime.add_rate = 0.0;
+        lifetime.del_rate = 0.0;
+    }
  
      ds_put_format(ds, "%s (%s):\n", ofproto->up.name,
                    dpif_name(ofproto->backer->dpif));
      ds_put_format(ds,
                    "\tlookups: hit:%"PRIu64" missed:%"PRIu64"\n",
                    ofproto->n_hit, ofproto->n_missed);
-    ds_put_format(ds, "\tflows: %zu\n",
-                  hmap_count(&ofproto->subfacets));
+    ds_put_format(ds, "\tflows: cur: %zu, avg: %5.3f, max: %d,"
+                  " life span: %llu(ms)\n",
+                  hmap_count(&ofproto->subfacets),
+                  avg_subfacet_count(ofproto),
+                  ofproto->max_n_subfacet,
+                  avg_subfacet_life_span(ofproto));
+    if (minutes >= 60) {
+        show_dp_rates(ds, "\t\thourly avg:", &ofproto->hourly);
+    }
+    if (minutes >= 60 * 24) {
+        show_dp_rates(ds, "\t\tdaily avg:",  &ofproto->daily);
+    }
+    show_dp_rates(ds, "\t\toverall avg:",  &lifetime);
  
      ports = shash_sort(&ofproto->up.port_by_name);
      for (i = 0; i < shash_count(&ofproto->up.port_by_name); i++) {
@@ -8495,6 +8901,79 @@ odp_port_to_ofp_port(const struct ofproto_dpif *ofproto, uint32_t odp_port)
          return OFPP_NONE;
      }
  }
+static unsigned long long int
+avg_subfacet_life_span(const struct ofproto_dpif *ofproto)
+{
+    unsigned long long int dc;
+    unsigned long long int avg;
+
+    dc = ofproto->total_subfacet_del_count + ofproto->subfacet_del_count;
+    avg = dc ? ofproto->total_subfacet_life_span / dc : 0;
+
+    return avg;
+}
+
+static double
+avg_subfacet_count(const struct ofproto_dpif *ofproto)
+{
+    double avg_c = 0.0;
+
+    if (ofproto->n_update_stats) {
+        avg_c = (double)ofproto->total_subfacet_count
+                / ofproto->n_update_stats;
+    }
+
+    return avg_c;
+}
+
+static void
+show_dp_rates(struct ds *ds, const char *heading,
+              const struct avg_subfacet_rates *rates)
+{
+    ds_put_format(ds, "%s add rate: %5.3f/min, del rate: %5.3f/min\n",
+                  heading, rates->add_rate, rates->del_rate);
+}
+
+static void
+update_max_subfacet_count(struct ofproto_dpif *ofproto)
+{
+    ofproto->max_n_subfacet = MAX(ofproto->max_n_subfacet,
+                                  hmap_count(&ofproto->subfacets));
+}
+
+/* Compute exponentially weighted moving average, adding 'new' as the newest,
+ * most heavily weighted element.  'base' designates the rate of decay: after
+ * 'base' further updates, 'new''s weight in the EWMA decays to about 1/e
+ * (about .37). */
+static void
+exp_mavg(double *avg, int base, double new)
+{
+    *avg = (*avg * (base - 1) + new) / base;
+}
+
+static void
+update_moving_averages(struct ofproto_dpif *ofproto)
+{
+    const int min_ms = 60 * 1000; /* milliseconds in one minute. */
+
+    /* Update hourly averages on the minute boundaries. */
+    if (time_msec() - ofproto->last_minute >= min_ms) {
+        exp_mavg(&ofproto->hourly.add_rate, 60, ofproto->subfacet_add_count);
+        exp_mavg(&ofproto->hourly.del_rate, 60, ofproto->subfacet_del_count);
+
+        /* Update daily averages on the hour boundaries. */
+        if ((ofproto->last_minute - ofproto->created) / min_ms % 60 == 59) {
+            exp_mavg(&ofproto->daily.add_rate, 24, ofproto->hourly.add_rate);
+            exp_mavg(&ofproto->daily.del_rate, 24, ofproto->hourly.del_rate);
+        }
+
+        ofproto->total_subfacet_add_count += ofproto->subfacet_add_count;
+        ofproto->total_subfacet_del_count += ofproto->subfacet_del_count;
+        ofproto->subfacet_add_count = 0;
+        ofproto->subfacet_del_count = 0;
+        ofproto->last_minute += min_ms;
+    }
+}
  
  static void
  dpif_stats_update_hit_count(struct ofproto_dpif *ofproto, uint64_t delta)
@@ -8551,8 +9030,11 @@ const struct ofproto_class ofproto_dpif_class = {
      set_netflow,
      get_netflow_ids,
      set_sflow,
+    set_ipfix,
      set_cfm,
      get_cfm_status,
+    set_bfd,
+    get_bfd_status,
      set_stp,
      get_stp_status,
      set_stp_port,