Merge citrix branch into master.

[sliver-openvswitch.git] / vswitchd / bridge.c
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c

index 22fe19a..5494dea 100644 (file)
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -43,6 +43,7 @@
  #include "odp-util.h"
  #include "ofp-print.h"
  #include "ofpbuf.h"
+#include "ofproto/netflow.h"
  #include "ofproto/ofproto.h"
  #include "packets.h"
  #include "poll-loop.h"
@@ -131,6 +132,7 @@ struct port {
      tag_type active_iface_tag;  /* Tag for bcast flows. */
      tag_type no_ifaces_tag;     /* Tag for flows when all ifaces disabled. */
      int updelay, downdelay;     /* Delay before iface goes up/down, in ms. */
+    bool bond_compat_is_stale;  /* Need to call port_update_bond_compat()? */
  
      /* Port mirroring info. */
      mirror_mask_t src_mirrors;  /* Mirrors triggered when packet received. */
@@ -146,7 +148,7 @@ struct port {
  struct bridge {
      struct list node;           /* Node in global list of bridges. */
      char *name;                 /* User-specified arbitrary name. */
-    struct mac_learning *ml;    /* MAC learning table, or null not to learn. */
+    struct mac_learning *ml;    /* MAC learning table. */
      bool sent_config_request;   /* Successfully sent config request? */
      uint8_t default_ea[ETH_ADDR_LEN]; /* Default MAC. */
  
@@ -193,6 +195,7 @@ enum { DP_MAX = 256 };
  static struct bridge *bridge_create(const char *name);
  static void bridge_destroy(struct bridge *);
  static struct bridge *bridge_lookup(const char *name);
+static void bridge_unixctl_dump_flows(struct unixctl_conn *, const char *);
  static int bridge_run_one(struct bridge *);
  static void bridge_reconfigure_one(struct bridge *);
  static void bridge_reconfigure_controller(struct bridge *);
@@ -215,6 +218,7 @@ static void bond_run(struct bridge *);
  static void bond_wait(struct bridge *);
  static void bond_rebalance_port(struct port *);
  static void bond_send_learning_packets(struct port *);
+static void bond_enable_slave(struct iface *iface, bool enable);
  
  static void port_create(struct bridge *, const char *name);
  static void port_reconfigure(struct port *);
@@ -243,6 +247,8 @@ static void iface_destroy(struct iface *);
  static struct iface *iface_lookup(const struct bridge *, const char *name);
  static struct iface *iface_from_dp_ifidx(const struct bridge *,
                                           uint16_t dp_ifidx);
+static bool iface_is_internal(const struct bridge *, const char *name);
+static void iface_set_mac(struct iface *);
  
  /* Hooks into ofproto processing. */
  static struct ofhooks bridge_ofhooks;
@@ -310,6 +316,9 @@ bridge_init(void)
              dpif_close(dpif);
          }
      }
+    svec_destroy(&dpif_names);
+
+    unixctl_command_register("bridge/dump-flows", bridge_unixctl_dump_flows);
  
      bond_init();
      bridge_reconfigure();
@@ -396,12 +405,23 @@ check_iface_dp_ifidx(struct bridge *br, struct iface *iface, void *aux UNUSED)
  }
  
  static bool
-set_iface_policing(struct bridge *br UNUSED, struct iface *iface,
+set_iface_properties(struct bridge *br UNUSED, struct iface *iface,
                     void *aux UNUSED)
  {
-    int rate = cfg_get_int(0, "port.%s.ingress.policing-rate", iface->name);
-    int burst = cfg_get_int(0, "port.%s.ingress.policing-burst", iface->name);
+    int rate, burst;
+
+    /* Set policing attributes. */
+    rate = cfg_get_int(0, "port.%s.ingress.policing-rate", iface->name);
+    burst = cfg_get_int(0, "port.%s.ingress.policing-burst", iface->name);
      netdev_set_policing(iface->netdev, rate, burst);
+
+    /* Set MAC address of internal interfaces other than the local
+     * interface. */
+    if (iface->dp_ifidx != ODPP_LOCAL
+        && iface_is_internal(br, iface->name)) {
+        iface_set_mac(iface);
+    }
+
      return true;
  }
  
@@ -526,10 +546,14 @@ bridge_reconfigure(void)
  
          for (i = 0; i < add_ifaces.n; i++) {
              const char *if_name = add_ifaces.names[i];
-            int internal = cfg_get_bool(0, "iface.%s.internal", if_name);
-            int flags = internal ? ODP_PORT_INTERNAL : 0;
-            int error = dpif_port_add(br->dpif, if_name, flags, NULL);
-            if (error == EXFULL) {
+            bool internal;
+            int error;
+
+            /* Add to datapath. */
+            internal = iface_is_internal(br, if_name);
+            error = dpif_port_add(br->dpif, if_name,
+                                  internal ? ODP_PORT_INTERNAL : 0, NULL);
+            if (error == EFBIG) {
                  VLOG_ERR("ran out of valid port numbers on %s",
                           dpif_name(br->dpif));
                  break;
@@ -547,9 +571,7 @@ bridge_reconfigure(void)
          uint64_t dpid;
          struct iface *local_iface;
          struct iface *hw_addr_iface;
-        uint8_t engine_type, engine_id;
-        bool add_id_to_iface = false;
-        struct svec nf_hosts;
+        struct netflow_options nf_options;
  
          bridge_fetch_dp_ifaces(br);
          iterate_and_prune_ifaces(br, init_iface_netdev, NULL);
@@ -573,35 +595,46 @@ bridge_reconfigure(void)
          ofproto_set_datapath_id(br->ofproto, dpid);
  
          /* Set NetFlow configuration on this bridge. */
-        dpif_get_netflow_ids(br->dpif, &engine_type, &engine_id);
+        memset(&nf_options, 0, sizeof nf_options);
+        dpif_get_netflow_ids(br->dpif, &nf_options.engine_type,
+                             &nf_options.engine_id);
+        nf_options.active_timeout = -1;
+
          if (cfg_has("netflow.%s.engine-type", br->name)) {
-            engine_type = cfg_get_int(0, "netflow.%s.engine-type", 
+            nf_options.engine_type = cfg_get_int(0, "netflow.%s.engine-type", 
                      br->name);
          }
          if (cfg_has("netflow.%s.engine-id", br->name)) {
-            engine_id = cfg_get_int(0, "netflow.%s.engine-id", br->name);
+            nf_options.engine_id = cfg_get_int(0, "netflow.%s.engine-id",
+                                               br->name);
+        }
+        if (cfg_has("netflow.%s.active-timeout", br->name)) {
+            nf_options.active_timeout = cfg_get_int(0,
+                                                    "netflow.%s.active-timeout",
+                                                    br->name);
          }
          if (cfg_has("netflow.%s.add-id-to-iface", br->name)) {
-            add_id_to_iface = cfg_get_bool(0, "netflow.%s.add-id-to-iface",
-                    br->name);
+            nf_options.add_id_to_iface = cfg_get_bool(0,
+                                                   "netflow.%s.add-id-to-iface",
+                                                    br->name);
          }
-        if (add_id_to_iface && engine_id > 0x7f) {
+        if (nf_options.add_id_to_iface && nf_options.engine_id > 0x7f) {
              VLOG_WARN("bridge %s: netflow port mangling may conflict with "
                      "another vswitch, choose an engine id less than 128", 
                      br->name);
          }
-        if (add_id_to_iface && br->n_ports > 0x1ff) {
+        if (nf_options.add_id_to_iface && br->n_ports > 508) {
              VLOG_WARN("bridge %s: netflow port mangling will conflict with "
-                    "another port when 512 or more ports are used", 
+                    "another port when more than 508 ports are used", 
                      br->name);
          }
-        svec_init(&nf_hosts);
-        cfg_get_all_keys(&nf_hosts, "netflow.%s.host", br->name);
-        if (ofproto_set_netflow(br->ofproto, &nf_hosts,  engine_type, 
-                    engine_id, add_id_to_iface)) {
+        svec_init(&nf_options.collectors);
+        cfg_get_all_keys(&nf_options.collectors, "netflow.%s.host", br->name);
+        if (ofproto_set_netflow(br->ofproto, &nf_options)) {
              VLOG_ERR("bridge %s: problem setting netflow collectors", 
                      br->name);
          }
+        svec_destroy(&nf_options.collectors);
  
          /* Update the controller and related settings.  It would be more
           * straightforward to call this from bridge_reconfigure_one(), but we
@@ -617,13 +650,14 @@ bridge_reconfigure(void)
      LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
          for (i = 0; i < br->n_ports; i++) {
              struct port *port = br->ports[i];
+
              port_update_vlan_compat(port);
              port_update_bonding(port);
          }
      }
      LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
          brstp_reconfigure(br);
-        iterate_and_prune_ifaces(br, set_iface_policing, NULL);
+        iterate_and_prune_ifaces(br, set_iface_properties, NULL);
      }
  }
  
@@ -657,31 +691,75 @@ bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN],
      memset(ea, 0xff, sizeof ea);
      for (i = 0; i < br->n_ports; i++) {
          struct port *port = br->ports[i];
+        uint8_t iface_ea[ETH_ADDR_LEN];
+        uint64_t iface_ea_u64;
+        struct iface *iface;
+
+        /* Mirror output ports don't participate. */
          if (port->is_mirror_output_port) {
              continue;
          }
-        for (j = 0; j < port->n_ifaces; j++) {
-            struct iface *iface = port->ifaces[j];
-            uint8_t iface_ea[ETH_ADDR_LEN];
+
+        /* Choose the MAC address to represent the port. */
+        iface_ea_u64 = cfg_get_mac(0, "port.%s.mac", port->name);
+        if (iface_ea_u64) {
+            /* User specified explicitly. */
+            eth_addr_from_uint64(iface_ea_u64, iface_ea);
+
+            /* Find the interface with this Ethernet address (if any) so that
+             * we can provide the correct devname to the caller. */
+            iface = NULL;
+            for (j = 0; j < port->n_ifaces; j++) {
+                struct iface *candidate = port->ifaces[j];
+                uint8_t candidate_ea[ETH_ADDR_LEN];
+                if (!netdev_get_etheraddr(candidate->netdev, candidate_ea)
+                    && eth_addr_equals(iface_ea, candidate_ea)) {
+                    iface = candidate;
+                }
+            }
+        } else {
+            /* Choose the interface whose MAC address will represent the port.
+             * The Linux kernel bonding code always chooses the MAC address of
+             * the first slave added to a bond, and the Fedora networking
+             * scripts always add slaves to a bond in alphabetical order, so
+             * for compatibility we choose the interface with the name that is
+             * first in alphabetical order. */
+            iface = port->ifaces[0];
+            for (j = 1; j < port->n_ifaces; j++) {
+                struct iface *candidate = port->ifaces[j];
+                if (strcmp(candidate->name, iface->name) < 0) {
+                    iface = candidate;
+                }
+            }
+
+            /* The local port doesn't count (since we're trying to choose its
+             * MAC address anyway).  Other internal ports don't count because
+             * we really want a physical MAC if we can get it, and internal
+             * ports typically have randomly generated MACs. */
              if (iface->dp_ifidx == ODPP_LOCAL
                  || cfg_get_bool(0, "iface.%s.internal", iface->name)) {
                  continue;
              }
+
+            /* Grab MAC. */
              error = netdev_get_etheraddr(iface->netdev, iface_ea);
-            if (!error) {
-                if (!eth_addr_is_multicast(iface_ea) &&
-                    !eth_addr_is_reserved(iface_ea) &&
-                    !eth_addr_is_zero(iface_ea) &&
-                    memcmp(iface_ea, ea, ETH_ADDR_LEN) < 0) {
-                    memcpy(ea, iface_ea, ETH_ADDR_LEN);
-                    *hw_addr_iface = iface;
-                }
-            } else {
+            if (error) {
                  static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
                  VLOG_ERR_RL(&rl, "failed to obtain Ethernet address of %s: %s",
                              iface->name, strerror(error));
+                continue;
              }
          }
+
+        /* Compare against our current choice. */
+        if (!eth_addr_is_multicast(iface_ea) &&
+            !eth_addr_is_reserved(iface_ea) &&
+            !eth_addr_is_zero(iface_ea) &&
+            memcmp(iface_ea, ea, ETH_ADDR_LEN) < 0)
+        {
+            memcpy(ea, iface_ea, ETH_ADDR_LEN);
+            *hw_addr_iface = iface;
+        }
      }
      if (eth_addr_is_multicast(ea) || eth_addr_is_vif(ea)) {
          memcpy(ea, br->default_ea, ETH_ADDR_LEN);
@@ -816,9 +894,7 @@ bridge_wait(void)
              continue;
          }
  
-        if (br->ml) {
-            mac_learning_wait(br->ml);
-        }
+        mac_learning_wait(br->ml);
          bond_wait(br);
          brstp_wait(br);
      }
@@ -831,9 +907,7 @@ bridge_flush(struct bridge *br)
  {
      COVERAGE_INC(bridge_flush);
      br->flush = true;
-    if (br->ml) {
-        mac_learning_flush(br->ml);
-    }
+    mac_learning_flush(br->ml);
  }
  
  /* Returns the 'br' interface for the ODPP_LOCAL port, or null if 'br' has no
@@ -862,6 +936,7 @@ bridge_unixctl_fdb_show(struct unixctl_conn *conn, const char *args)
  {
      struct ds ds = DS_EMPTY_INITIALIZER;
      const struct bridge *br;
+    const struct mac_entry *e;
  
      br = bridge_lookup(args);
      if (!br) {
@@ -870,16 +945,13 @@ bridge_unixctl_fdb_show(struct unixctl_conn *conn, const char *args)
      }
  
      ds_put_cstr(&ds, " port  VLAN  MAC                Age\n");
-    if (br->ml) {
-        const struct mac_entry *e;
-        LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
-            if (e->port < 0 || e->port >= br->n_ports) {
-                continue;
-            }
-            ds_put_format(&ds, "%5d  %4d  "ETH_ADDR_FMT"  %3d\n",
-                          br->ports[e->port]->ifaces[0]->dp_ifidx,
-                          e->vlan, ETH_ADDR_ARGS(e->mac), mac_entry_age(e));
+    LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
+        if (e->port < 0 || e->port >= br->n_ports) {
+            continue;
          }
+        ds_put_format(&ds, "%5d  %4d  "ETH_ADDR_FMT"  %3d\n",
+                      br->ports[e->port]->ifaces[0]->dp_ifidx,
+                      e->vlan, ETH_ADDR_ARGS(e->mac), mac_entry_age(e));
      }
      unixctl_command_reply(conn, 200, ds_cstr(&ds));
      ds_destroy(&ds);
@@ -990,6 +1062,27 @@ bridge_get_datapathid(const char *name)
      return br ? ofproto_get_datapath_id(br->ofproto) : 0;
  }
  
+/* Handle requests for a listing of all flows known by the OpenFlow
+ * stack, including those normally hidden. */
+static void
+bridge_unixctl_dump_flows(struct unixctl_conn *conn, const char *args)
+{
+    struct bridge *br;
+    struct ds results;
+    
+    br = bridge_lookup(args);
+    if (!br) {
+        unixctl_command_reply(conn, 501, "Unknown bridge");
+        return;
+    }
+
+    ds_init(&results);
+    ofproto_get_all_flows(br->ofproto, &results);
+
+    unixctl_command_reply(conn, 200, ds_cstr(&results));
+    ds_destroy(&results);
+}
+
  static int
  bridge_run_one(struct bridge *br)
  {
@@ -1000,9 +1093,7 @@ bridge_run_one(struct bridge *br)
          return error;
      }
  
-    if (br->ml) {
-        mac_learning_run(br->ml, ofproto_get_revalidate_set(br->ofproto));
-    }
+    mac_learning_run(br->ml, ofproto_get_revalidate_set(br->ofproto));
      bond_run(br);
      brstp_run(br);
  
@@ -1236,7 +1327,7 @@ bridge_reconfigure_controller(struct bridge *br)
          if (probe < 5) {
              probe = cfg_get_int(0, "mgmt.inactivity-probe");
              if (probe < 5) {
-                probe = 15;
+                probe = 5;
              }
          }
          ofproto_set_probe_interval(br->ofproto, probe);
@@ -1245,7 +1336,7 @@ bridge_reconfigure_controller(struct bridge *br)
          if (!max_backoff) {
              max_backoff = cfg_get_int(0, "mgmt.max-backoff");
              if (!max_backoff) {
-                max_backoff = 15;
+                max_backoff = 8;
              }
          }
          ofproto_set_max_backoff(br->ofproto, max_backoff);
@@ -1316,9 +1407,12 @@ bridge_get_all_ifaces(const struct bridge *br, struct svec *ifaces)
              struct iface *iface = port->ifaces[j];
              svec_add(ifaces, iface->name);
          }
+        if (port->n_ifaces > 1
+            && cfg_get_bool(0, "bonding.%s.fake-iface", port->name)) {
+            svec_add(ifaces, port->name);
+        }
      }
-    svec_sort(ifaces);
-    assert(svec_is_unique(ifaces));
+    svec_sort_unique(ifaces);
  }
  
  /* For robustness, in case the administrator moves around datapath ports behind
@@ -1383,13 +1477,31 @@ lookup_bond_entry(const struct port *port, const uint8_t mac[ETH_ADDR_LEN])
  static int
  bond_choose_iface(const struct port *port)
  {
-    size_t i;
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+    size_t i, best_down_slave = -1;
+    long long next_delay_expiration = LLONG_MAX;
+
      for (i = 0; i < port->n_ifaces; i++) {
-        if (port->ifaces[i]->enabled) {
+        struct iface *iface = port->ifaces[i];
+
+        if (iface->enabled) {
              return i;
+        } else if (iface->delay_expires < next_delay_expiration) {
+            best_down_slave = i;
+            next_delay_expiration = iface->delay_expires;
          }
      }
-    return -1;
+
+    if (best_down_slave != -1) {
+        struct iface *iface = port->ifaces[best_down_slave];
+
+        VLOG_INFO_RL(&rl, "interface %s: skipping remaining %lli ms updelay "
+                     "since no other interface is up", iface->name,
+                     iface->delay_expires - time_msec());
+        bond_enable_slave(iface, true);
+    }
+
+    return best_down_slave;
  }
  
  static bool
@@ -1413,6 +1525,7 @@ choose_output_iface(const struct port *port, const uint8_t *dl_src,
                  return false;
              }
              e->iface_tag = tag_create_random();
+            ((struct port *) port)->bond_compat_is_stale = true;
          }
          *tags |= e->iface_tag;
          iface = port->ifaces[e->iface_idx];
@@ -1438,10 +1551,12 @@ bond_link_status_update(struct iface *iface, bool carrier)
          iface->delay_expires = LLONG_MAX;
          VLOG_INFO_RL(&rl, "interface %s: will not be %s",
                       iface->name, carrier ? "disabled" : "enabled");
-    } else if (carrier && port->updelay && port->active_iface < 0) {
-        iface->delay_expires = time_msec();
-        VLOG_INFO_RL(&rl, "interface %s: skipping %d ms updelay since no "
-                     "other interface is up", iface->name, port->updelay);
+    } else if (carrier && port->active_iface < 0) {
+        bond_enable_slave(iface, true);
+        if (port->updelay) {
+            VLOG_INFO_RL(&rl, "interface %s: skipping %d ms updelay since no "
+                         "other interface is up", iface->name, port->updelay);
+        }
      } else {
          int delay = carrier ? port->updelay : port->downdelay;
          iface->delay_expires = time_msec() + delay;
@@ -1478,6 +1593,12 @@ bond_enable_slave(struct iface *iface, bool enable)
      struct port *port = iface->port;
      struct bridge *br = port->bridge;
  
+    /* This acts as a recursion check.  If the act of disabling a slave
+     * causes a different slave to be enabled, the flag will allow us to
+     * skip redundant work when we reenter this function.  It must be
+     * cleared on exit to keep things safe with multiple bonds. */
+    static bool moving_active_iface = false;
+
      iface->delay_expires = LLONG_MAX;
      if (enable == iface->enabled) {
          return;
@@ -1490,18 +1611,29 @@ bond_enable_slave(struct iface *iface, bool enable)
          if (iface->port_ifidx == port->active_iface) {
              ofproto_revalidate(br->ofproto,
                                 port->active_iface_tag);
+
+            /* Disabling a slave can lead to another slave being immediately
+             * enabled if there will be no active slaves but one is waiting
+             * on an updelay.  In this case we do not need to run most of the
+             * code for the newly enabled slave since there was no period
+             * without an active slave and it is redundant with the disabling
+             * path. */
+            moving_active_iface = true;
              bond_choose_active_iface(port);
          }
          bond_send_learning_packets(port);
      } else {
          VLOG_WARN("interface %s: enabled", iface->name);
-        if (port->active_iface < 0) {
+        if (port->active_iface < 0 && !moving_active_iface) {
              ofproto_revalidate(br->ofproto, port->no_ifaces_tag);
              bond_choose_active_iface(port);
              bond_send_learning_packets(port);
          }
          iface->tag = tag_create_random();
      }
+
+    moving_active_iface = false;
+    port->bond_compat_is_stale = true;
  }
  
  static void
@@ -1511,15 +1643,20 @@ bond_run(struct bridge *br)
  
      for (i = 0; i < br->n_ports; i++) {
          struct port *port = br->ports[i];
-        if (port->n_ifaces < 2) {
-            continue;
-        }
-        for (j = 0; j < port->n_ifaces; j++) {
-            struct iface *iface = port->ifaces[j];
-            if (time_msec() >= iface->delay_expires) {
-                bond_enable_slave(iface, !iface->enabled);
+
+        if (port->n_ifaces >= 2) {
+            for (j = 0; j < port->n_ifaces; j++) {
+                struct iface *iface = port->ifaces[j];
+                if (time_msec() >= iface->delay_expires) {
+                    bond_enable_slave(iface, !iface->enabled);
+                }
              }
          }
+
+        if (port->bond_compat_is_stale) {
+            port->bond_compat_is_stale = false;
+            port_update_bond_compat(port);
+        }
      }
  }
  
@@ -1645,7 +1782,7 @@ port_includes_vlan(const struct port *port, uint16_t vlan)
  static size_t
  compose_dsts(const struct bridge *br, const flow_t *flow, uint16_t vlan,
               const struct port *in_port, const struct port *out_port,
-             struct dst dsts[], tag_type *tags)
+             struct dst dsts[], tag_type *tags, uint16_t *nf_output_iface)
  {
      mirror_mask_t mirrors = in_port->src_mirrors;
      struct dst *dst = dsts;
@@ -1664,7 +1801,9 @@ compose_dsts(const struct bridge *br, const flow_t *flow, uint16_t vlan,
                  dst++;
              }
          }
+        *nf_output_iface = NF_OUT_FLOOD;
      } else if (out_port && set_dst(dst, flow, in_port, out_port, tags)) {
+        *nf_output_iface = dst->dp_ifidx;
          mirrors |= out_port->dst_mirrors;
          dst++;
      }
@@ -1681,14 +1820,28 @@ compose_dsts(const struct bridge *br, const flow_t *flow, uint16_t vlan,
                  for (i = 0; i < br->n_ports; i++) {
                      struct port *port = br->ports[i];
                      if (port_includes_vlan(port, m->out_vlan)
-                        && set_dst(dst, flow, in_port, port, tags)
-                        && !dst_is_duplicate(dsts, dst - dsts, dst))
+                        && set_dst(dst, flow, in_port, port, tags))
                      {
+                        int flow_vlan;
+
                          if (port->vlan < 0) {
                              dst->vlan = m->out_vlan;
                          }
-                        if (dst->dp_ifidx == flow->in_port
-                            && dst->vlan == vlan) {
+                        if (dst_is_duplicate(dsts, dst - dsts, dst)) {
+                            continue;
+                        }
+
+                        /* Use the vlan tag on the original flow instead of
+                         * the one passed in the vlan parameter.  This ensures
+                         * that we compare the vlan from before any implicit
+                         * tagging tags place. This is necessary because
+                         * dst->vlan is the final vlan, after removing implicit
+                         * tags. */
+                        flow_vlan = ntohs(flow->dl_vlan);
+                        if (flow_vlan == 0) {
+                            flow_vlan = OFP_VLAN_NONE;
+                        }
+                        if (port == in_port && dst->vlan == flow_vlan) {
                              /* Don't send out input port on same VLAN. */
                              continue;
                          }
@@ -1718,14 +1871,16 @@ print_dsts(const struct dst *dsts, size_t n)
  static void
  compose_actions(struct bridge *br, const flow_t *flow, uint16_t vlan,
                  const struct port *in_port, const struct port *out_port,
-                tag_type *tags, struct odp_actions *actions)
+                tag_type *tags, struct odp_actions *actions,
+                uint16_t *nf_output_iface)
  {
      struct dst dsts[DP_MAX_PORTS * (MAX_MIRRORS + 1)];
      size_t n_dsts;
      const struct dst *p;
      uint16_t cur_vlan;
  
-    n_dsts = compose_dsts(br, flow, vlan, in_port, out_port, dsts, tags);
+    n_dsts = compose_dsts(br, flow, vlan, in_port, out_port, dsts, tags,
+                          nf_output_iface);
  
      cur_vlan = ntohs(flow->dl_vlan);
      for (p = dsts; p < &dsts[n_dsts]; p++) {
@@ -1744,14 +1899,77 @@ compose_actions(struct bridge *br, const flow_t *flow, uint16_t vlan,
      }
  }
  
+/* Returns the effective vlan of a packet, taking into account both the
+ * 802.1Q header and implicitly tagged ports.  A value of 0 indicates that
+ * the packet is untagged and -1 indicates it has an invalid header and
+ * should be dropped. */
+static int flow_get_vlan(struct bridge *br, const flow_t *flow,
+                         struct port *in_port, bool have_packet)
+{
+    /* Note that dl_vlan of 0 and of OFP_VLAN_NONE both mean that the packet
+     * belongs to VLAN 0, so we should treat both cases identically.  (In the
+     * former case, the packet has an 802.1Q header that specifies VLAN 0,
+     * presumably to allow a priority to be specified.  In the latter case, the
+     * packet does not have any 802.1Q header.) */
+    int vlan = ntohs(flow->dl_vlan);
+    if (vlan == OFP_VLAN_NONE) {
+        vlan = 0;
+    }
+    if (in_port->vlan >= 0) {
+        if (vlan) {
+            /* XXX support double tagging? */
+            if (have_packet) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+                VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %"PRIu16" tagged "
+                             "packet received on port %s configured with "
+                             "implicit VLAN %"PRIu16,
+                             br->name, ntohs(flow->dl_vlan),
+                             in_port->name, in_port->vlan);
+            }
+            return -1;
+        }
+        vlan = in_port->vlan;
+    } else {
+        if (!port_includes_vlan(in_port, vlan)) {
+            if (have_packet) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+                VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %d tagged "
+                             "packet received on port %s not configured for "
+                             "trunking VLAN %d",
+                             br->name, vlan, in_port->name, vlan);
+            }
+            return -1;
+        }
+    }
+
+    return vlan;
+}
+
+static void
+update_learning_table(struct bridge *br, const flow_t *flow, int vlan,
+                      struct port *in_port)
+{
+    tag_type rev_tag = mac_learning_learn(br->ml, flow->dl_src,
+                                          vlan, in_port->port_idx);
+    if (rev_tag) {
+        /* The log messages here could actually be useful in debugging,
+         * so keep the rate limit relatively high. */
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30,
+                                                                300);
+        VLOG_DBG_RL(&rl, "bridge %s: learned that "ETH_ADDR_FMT" is "
+                    "on port %s in VLAN %d",
+                    br->name, ETH_ADDR_ARGS(flow->dl_src),
+                    in_port->name, vlan);
+        ofproto_revalidate(br->ofproto, rev_tag);
+    }
+}
+
  static bool
-is_bcast_arp_reply(const flow_t *flow, const struct ofpbuf *packet)
+is_bcast_arp_reply(const flow_t *flow)
  {
-    struct arp_eth_header *arp = (struct arp_eth_header *) packet->data;
      return (flow->dl_type == htons(ETH_TYPE_ARP)
-            && eth_addr_is_broadcast(flow->dl_dst)
-            && packet->size >= sizeof(struct arp_eth_header)
-            && arp->ar_op == ARP_OP_REQUEST);
+            && flow->nw_proto == ARP_OP_REPLY
+            && eth_addr_is_broadcast(flow->dl_dst));
  }
  
  /* If the composed actions may be applied to any packet in the given 'flow',
@@ -1760,12 +1978,13 @@ is_bcast_arp_reply(const flow_t *flow, const struct ofpbuf *packet)
  static bool
  process_flow(struct bridge *br, const flow_t *flow,
               const struct ofpbuf *packet, struct odp_actions *actions,
-             tag_type *tags)
+             tag_type *tags, uint16_t *nf_output_iface)
  {
      struct iface *in_iface;
      struct port *in_port;
      struct port *out_port = NULL; /* By default, drop the packet/flow. */
      int vlan;
+    int out_port_idx;
  
      /* Find the interface and port structure for the received packet. */
      in_iface = iface_from_dp_ifidx(br, flow->in_port);
@@ -1793,41 +2012,9 @@ process_flow(struct bridge *br, const flow_t *flow,
          return true;
      }
      in_port = in_iface->port;
-
-    /* Figure out what VLAN this packet belongs to.
-     *
-     * Note that dl_vlan of 0 and of OFP_VLAN_NONE both mean that the packet
-     * belongs to VLAN 0, so we should treat both cases identically.  (In the
-     * former case, the packet has an 802.1Q header that specifies VLAN 0,
-     * presumably to allow a priority to be specified.  In the latter case, the
-     * packet does not have any 802.1Q header.) */
-    vlan = ntohs(flow->dl_vlan);
-    if (vlan == OFP_VLAN_NONE) {
-        vlan = 0;
-    }
-    if (in_port->vlan >= 0) {
-        if (vlan) {
-            /* XXX support double tagging? */
-            if (packet != NULL) {
-                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
-                VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %"PRIu16" tagged "
-                             "packet received on port %s configured with "
-                             "implicit VLAN %"PRIu16,
-                             br->name, ntohs(flow->dl_vlan),
-                             in_port->name, in_port->vlan);
-            }
-            goto done;
-        }
-        vlan = in_port->vlan;
-    } else {
-        if (!port_includes_vlan(in_port, vlan)) {
-            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
-            VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %d tagged "
-                         "packet received on port %s not configured for "
-                         "trunking VLAN %d",
-                         br->name, vlan, in_port->name, vlan);
-            goto done;
-        }
+    vlan = flow_get_vlan(br, flow, in_port, !!packet);
+    if (vlan < 0) {
+        goto done;
      }
  
      /* Drop frames for ports that STP wants entirely killed (both for
@@ -1851,63 +2038,48 @@ process_flow(struct bridge *br, const flow_t *flow,
          goto done;
      }
  
-    /* Multicast (and broadcast) packets on bonds need special attention, to
-     * avoid receiving duplicates. */
-    if (in_port->n_ifaces > 1 && eth_addr_is_multicast(flow->dl_dst)) {
-        *tags |= in_port->active_iface_tag;
-        if (in_port->active_iface != in_iface->port_ifidx) {
-            /* Drop all multicast packets on inactive slaves. */
-            goto done;
-        } else {
-            /* Drop all multicast packets for which we have learned a different
-             * input port, because we probably sent the packet on one slaves
-             * and got it back on the active slave.  Broadcast ARP replies are
-             * an exception to this rule: the host has moved to another
-             * switch. */
-            int src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan);
-            if (src_idx != -1 && src_idx != in_port->port_idx) {
-                if (packet) {
-                    if (!is_bcast_arp_reply(flow, packet)) {
-                        goto done;
-                    }
-                } else {
-                    /* No way to know whether it's an ARP reply, because the
-                     * flow entry doesn't include enough information and we
-                     * don't have a packet.  Punt. */
-                    return false;
-                }
+    /* Packets received on bonds need special attention to avoid duplicates. */
+    if (in_port->n_ifaces > 1) {
+        int src_idx;
+
+        if (eth_addr_is_multicast(flow->dl_dst)) {
+            *tags |= in_port->active_iface_tag;
+            if (in_port->active_iface != in_iface->port_ifidx) {
+                /* Drop all multicast packets on inactive slaves. */
+                goto done;
              }
          }
+
+        /* Drop all packets for which we have learned a different input
+         * port, because we probably sent the packet on one slave and got
+         * it back on the other.  Broadcast ARP replies are an exception
+         * to this rule: the host has moved to another switch. */
+        src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan);
+        if (src_idx != -1 && src_idx != in_port->port_idx &&
+            !is_bcast_arp_reply(flow)) {
+                goto done;
+        }
      }
  
      /* MAC learning. */
      out_port = FLOOD_PORT;
-    if (br->ml) {
-        int out_port_idx;
-
-        /* Learn source MAC (but don't try to learn from revalidation). */
-        if (packet) {
-            tag_type rev_tag = mac_learning_learn(br->ml, flow->dl_src,
-                                                  vlan, in_port->port_idx);
-            if (rev_tag) {
-                /* The log messages here could actually be useful in debugging,
-                 * so keep the rate limit relatively high. */
-                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30,
-                                                                        300);
-                VLOG_DBG_RL(&rl, "bridge %s: learned that "ETH_ADDR_FMT" is "
-                            "on port %s in VLAN %d",
-                            br->name, ETH_ADDR_ARGS(flow->dl_src),
-                            in_port->name, vlan);
-                ofproto_revalidate(br->ofproto, rev_tag);
-            }
-        }
-
-        /* Determine output port. */
-        out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan,
-                                               tags);
-        if (out_port_idx >= 0 && out_port_idx < br->n_ports) {
-            out_port = br->ports[out_port_idx];
-        }
+    /* Learn source MAC (but don't try to learn from revalidation). */
+    if (packet) {
+        update_learning_table(br, flow, vlan, in_port);
+    }
+
+    /* Determine output port. */
+    out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan,
+                                           tags);
+    if (out_port_idx >= 0 && out_port_idx < br->n_ports) {
+        out_port = br->ports[out_port_idx];
+    } else if (!packet && !eth_addr_is_multicast(flow->dl_dst)) {
+        /* If we are revalidating but don't have a learning entry then
+         * eject the flow.  Installing a flow that floods packets opens
+         * up a window of time where we could learn from a packet reflected
+         * on a bond and blackhole packets before the learning table is
+         * updated to reflect the correct port. */
+        return false;
      }
  
      /* Don't send packets out their input ports.  Don't forward frames that STP
@@ -1917,17 +2089,10 @@ process_flow(struct bridge *br, const flow_t *flow,
      }
  
  done:
-    compose_actions(br, flow, vlan, in_port, out_port, tags, actions);
+    compose_actions(br, flow, vlan, in_port, out_port, tags, actions,
+                    nf_output_iface);
  
-    /*
-     * We send out only a single packet, instead of setting up a flow, if the
-     * packet is an ARP directed to broadcast that arrived on a bonded
-     * interface.  In such a situation ARP requests and replies must be handled
-     * differently, but OpenFlow unfortunately can't distinguish them.
-     */
-    return (in_port->n_ifaces < 2
-            || flow->dl_type != htons(ETH_TYPE_ARP)
-            || !eth_addr_is_broadcast(flow->dl_dst));
+    return true;
  }
  
  /* Careful: 'opp' is in host byte order and opp->port_no is an OFP port
@@ -1969,7 +2134,8 @@ bridge_port_changed_ofhook_cb(enum ofp_port_reason reason,
  
  static bool
  bridge_normal_ofhook_cb(const flow_t *flow, const struct ofpbuf *packet,
-                        struct odp_actions *actions, tag_type *tags, void *br_)
+                        struct odp_actions *actions, tag_type *tags,
+                        uint16_t *nf_output_iface, void *br_)
  {
      struct bridge *br = br_;
  
@@ -1982,7 +2148,7 @@ bridge_normal_ofhook_cb(const flow_t *flow, const struct ofpbuf *packet,
  #endif
  
      COVERAGE_INC(bridge_process_flow);
-    return process_flow(br, flow, packet, actions, tags);
+    return process_flow(br, flow, packet, actions, tags, nf_output_iface);
  }
  
  static void
@@ -1992,17 +2158,30 @@ bridge_account_flow_ofhook_cb(const flow_t *flow,
                                void *br_)
  {
      struct bridge *br = br_;
+    struct port *in_port;
      const union odp_action *a;
  
+    /* Feed information from the active flows back into the learning table
+     * to ensure that table is always in sync with what is actually flowing
+     * through the datapath. */
+    in_port = port_from_dp_ifidx(br, flow->in_port);
+    if (in_port) {
+        int vlan = flow_get_vlan(br, flow, in_port, false);
+         if (vlan >= 0) {
+            update_learning_table(br, flow, vlan, in_port);
+        }
+    }
+
      if (!br->has_bonded_ports) {
          return;
      }
  
      for (a = actions; a < &actions[n_actions]; a++) {
          if (a->type == ODPAT_OUTPUT) {
-            struct port *port = port_from_dp_ifidx(br, a->output.port);
-            if (port && port->n_ifaces >= 2) {
-                struct bond_entry *e = lookup_bond_entry(port, flow->dl_src);
+            struct port *out_port = port_from_dp_ifidx(br, a->output.port);
+            if (out_port && out_port->n_ifaces >= 2) {
+                struct bond_entry *e = lookup_bond_entry(out_port,
+                                                         flow->dl_src);
                  e->tx_bytes += n_bytes;
              }
          }
@@ -2157,8 +2336,9 @@ log_bals(const struct slave_balance *bals, size_t n_bals, struct port *port)
  /* Shifts 'hash' from 'from' to 'to' within 'port'. */
  static void
  bond_shift_load(struct slave_balance *from, struct slave_balance *to,
-                struct bond_entry *hash)
+                int hash_idx)
  {
+    struct bond_entry *hash = from->hashes[hash_idx];
      struct port *port = from->iface->port;
      uint64_t delta = hash->tx_bytes;
  
@@ -2176,12 +2356,11 @@ bond_shift_load(struct slave_balance *from, struct slave_balance *to,
       * it require more work, the only purpose it would be to allow that hash to
       * be migrated to another slave in this rebalancing run, and there is no
       * point in doing that.  */
-    if (from->hashes[0] == hash) {
+    if (hash_idx == 0) {
          from->hashes++;
      } else {
-        int i = hash - from->hashes[0];
-        memmove(from->hashes + i, from->hashes + i + 1,
-                (from->n_hashes - (i + 1)) * sizeof *from->hashes);
+        memmove(from->hashes + hash_idx, from->hashes + hash_idx + 1,
+                (from->n_hashes - (hash_idx + 1)) * sizeof *from->hashes);
      }
      from->n_hashes--;
  
@@ -2266,22 +2445,60 @@ bond_rebalance_port(struct port *port)
              /* 'from' is carrying significantly more load than 'to', and that
               * load is split across at least two different hashes.  Pick a hash
               * to migrate to 'to' (the least-loaded slave), given that doing so
-             * must not cause 'to''s load to exceed 'from''s load.
+             * must decrease the ratio of the load on the two slaves by at
+             * least 0.1.
               *
               * The sort order we use means that we prefer to shift away the
               * smallest hashes instead of the biggest ones.  There is little
               * reason behind this decision; we could use the opposite sort
               * order to shift away big hashes ahead of small ones. */
              size_t i;
+            bool order_swapped;
  
              for (i = 0; i < from->n_hashes; i++) {
+                double old_ratio, new_ratio;
                  uint64_t delta = from->hashes[i]->tx_bytes;
-                if (to->tx_bytes + delta < from->tx_bytes - delta) {
+
+                if (delta == 0 || from->tx_bytes - delta == 0) {
+                    /* Pointless move. */
+                    continue;
+                }
+
+                order_swapped = from->tx_bytes - delta < to->tx_bytes + delta;
+
+                if (to->tx_bytes == 0) {
+                    /* Nothing on the new slave, move it. */
+                    break;
+                }
+
+                old_ratio = (double)from->tx_bytes / to->tx_bytes;
+                new_ratio = (double)(from->tx_bytes - delta) /
+                            (to->tx_bytes + delta);
+
+                if (new_ratio == 0) {
+                    /* Should already be covered but check to prevent division
+                     * by zero. */
+                    continue;
+                }
+
+                if (new_ratio < 1) {
+                    new_ratio = 1 / new_ratio;
+                }
+
+                if (old_ratio - new_ratio > 0.1) {
+                    /* Would decrease the ratio, move it. */
                      break;
                  }
              }
              if (i < from->n_hashes) {
-                bond_shift_load(from, to, from->hashes[i]);
+                bond_shift_load(from, to, i);
+                port->bond_compat_is_stale = true;
+
+                /* If the result of the migration changed the relative order of
+                 * 'from' and 'to' swap them back to maintain invariants. */
+                if (order_swapped) {
+                    swap_bals(from, to);
+                }
  
                  /* Re-sort 'bals'.  Note that this may make 'from' and 'to'
                   * point to different slave_balance structures.  It is only
@@ -2310,17 +2527,14 @@ bond_send_learning_packets(struct port *port)
      struct ofpbuf packet;
      int error, n_packets, n_errors;
  
-    if (!port->n_ifaces || port->active_iface < 0 || !br->ml) {
+    if (!port->n_ifaces || port->active_iface < 0) {
          return;
      }
  
      ofpbuf_init(&packet, 128);
      error = n_packets = n_errors = 0;
      LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
-        static const char s[] = "Open vSwitch Bond Failover";
          union ofp_action actions[2], *a;
-        struct eth_header *eth;
-        struct llc_snap_header *llc_snap;
          uint16_t dp_ifidx;
          tag_type tags = 0;
          flow_t flow;
@@ -2331,23 +2545,6 @@ bond_send_learning_packets(struct port *port)
              continue;
          }
  
-        /* Compose packet to send. */
-        ofpbuf_clear(&packet);
-        eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN);
-        llc_snap = ofpbuf_put_zeros(&packet, LLC_SNAP_HEADER_LEN);
-        ofpbuf_put(&packet, s, sizeof s); /* Includes null byte. */
-        ofpbuf_put(&packet, e->mac, ETH_ADDR_LEN);
-
-        memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN);
-        memcpy(eth->eth_src, e->mac, ETH_ADDR_LEN);
-        eth->eth_type = htons(packet.size - ETH_HEADER_LEN);
-
-        llc_snap->llc.llc_dsap = LLC_DSAP_SNAP;
-        llc_snap->llc.llc_ssap = LLC_SSAP_SNAP;
-        llc_snap->llc.llc_cntl = LLC_CNTL_SNAP;
-        memcpy(llc_snap->snap.snap_org, "\x00\x23\x20", 3);
-        llc_snap->snap.snap_type = htons(0xf177); /* Random number. */
-
          /* Compose actions. */
          memset(actions, 0, sizeof actions);
          a = actions;
@@ -2364,6 +2561,8 @@ bond_send_learning_packets(struct port *port)
  
          /* Send packet. */
          n_packets++;
+        compose_benign_packet(&packet, "Open vSwitch Bond Failover", 0xf177,
+                              e->mac);
          flow_extract(&packet, ODPP_NONE, &flow);
          retval = ofproto_send_packet(br->ofproto, &flow, actions, a - actions,
                                       &packet);
@@ -2479,14 +2678,10 @@ bond_unixctl_show(struct unixctl_conn *conn, const char *args)
                  continue;
              }
  
-            ds_put_format(&ds, "\thash %d: %lld kB load\n",
+            ds_put_format(&ds, "\thash %d: %"PRIu64" kB load\n",
                            hash, be->tx_bytes / 1024);
  
              /* MACs. */
-            if (!port->bridge->ml) {
-                break;
-            }
-
              LIST_FOR_EACH (me, struct mac_entry, lru_node,
                             &port->bridge->ml->lrus) {
                  uint16_t dp_ifidx;
@@ -2558,6 +2753,7 @@ bond_unixctl_migrate(struct unixctl_conn *conn, const char *args_)
      ofproto_revalidate(port->bridge->ofproto, entry->iface_tag);
      entry->iface_idx = iface->port_ifidx;
      entry->iface_tag = tag_create_random();
+    port->bond_compat_is_stale = true;
      unixctl_command_reply(conn, 200, "migrated");
  }
  
@@ -2653,6 +2849,25 @@ bond_unixctl_disable_slave(struct unixctl_conn *conn, const char *args)
      enable_slave(conn, args, false);
  }
  
+static void
+bond_unixctl_hash(struct unixctl_conn *conn, const char *args)
+{
+       uint8_t mac[ETH_ADDR_LEN];
+       uint8_t hash;
+       char *hash_cstr;
+
+       if (sscanf(args, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))
+           == ETH_ADDR_SCAN_COUNT) {
+               hash = bond_hash(mac);
+
+               hash_cstr = xasprintf("%u", hash);
+               unixctl_command_reply(conn, 200, hash_cstr);
+               free(hash_cstr);
+       } else {
+               unixctl_command_reply(conn, 501, "invalid mac");
+       }
+}
+
  static void
  bond_init(void)
  {
@@ -2663,6 +2878,7 @@ bond_init(void)
                               bond_unixctl_set_active_slave);
      unixctl_command_register("bond/enable-slave", bond_unixctl_enable_slave);
      unixctl_command_register("bond/disable-slave", bond_unixctl_disable_slave);
+    unixctl_command_register("bond/hash", bond_unixctl_hash);
  }
  \f
  /* Port functions. */
@@ -2822,6 +3038,7 @@ port_destroy(struct port *port)
          size_t i;
  
          proc_net_compat_update_vlan(port->name, NULL, 0);
+        proc_net_compat_update_bond(port->name, NULL);
  
          for (i = 0; i < MAX_MIRRORS; i++) {
              struct mirror *m = br->mirrors[i];
@@ -2888,7 +3105,7 @@ port_update_bonding(struct port *port)
          if (port->bond_hash) {
              free(port->bond_hash);
              port->bond_hash = NULL;
-            proc_net_compat_update_bond(port->name, NULL);
+            port->bond_compat_is_stale = true;
          }
      } else {
          if (!port->bond_hash) {
@@ -2903,36 +3120,79 @@ port_update_bonding(struct port *port)
              port->no_ifaces_tag = tag_create_random();
              bond_choose_active_iface(port);
          }
-        port_update_bond_compat(port);
+        port->bond_compat_is_stale = true;
      }
  }
  
  static void
  port_update_bond_compat(struct port *port)
  {
+    struct compat_bond_hash compat_hashes[BOND_MASK + 1];
      struct compat_bond bond;
      size_t i;
  
      if (port->n_ifaces < 2) {
+        proc_net_compat_update_bond(port->name, NULL);
          return;
      }
  
      bond.up = false;
      bond.updelay = port->updelay;
      bond.downdelay = port->downdelay;
+
+    bond.n_hashes = 0;
+    bond.hashes = compat_hashes;
+    if (port->bond_hash) {
+        const struct bond_entry *e;
+        for (e = port->bond_hash; e <= &port->bond_hash[BOND_MASK]; e++) {
+            if (e->iface_idx >= 0 && e->iface_idx < port->n_ifaces) {
+                struct compat_bond_hash *cbh = &bond.hashes[bond.n_hashes++];
+                cbh->hash = e - port->bond_hash;
+                cbh->netdev_name = port->ifaces[e->iface_idx]->name;
+            }
+        }
+    }
+
      bond.n_slaves = port->n_ifaces;
      bond.slaves = xmalloc(port->n_ifaces * sizeof *bond.slaves);
      for (i = 0; i < port->n_ifaces; i++) {
          struct iface *iface = port->ifaces[i];
          struct compat_bond_slave *slave = &bond.slaves[i];
          slave->name = iface->name;
-        slave->up = ((iface->enabled && iface->delay_expires == LLONG_MAX) ||
-                     (!iface->enabled && iface->delay_expires != LLONG_MAX));
+
+        /* We need to make the same determination as the Linux bonding
+         * code to determine whether a slave should be consider "up".
+         * The Linux function bond_miimon_inspect() supports four 
+         * BOND_LINK_* states:
+         *      
+         *    - BOND_LINK_UP: carrier detected, updelay has passed.
+         *    - BOND_LINK_FAIL: carrier lost, downdelay in progress.
+         *    - BOND_LINK_DOWN: carrier lost, downdelay has passed.
+         *    - BOND_LINK_BACK: carrier detected, updelay in progress.
+         *
+         * The function bond_info_show_slave() only considers BOND_LINK_UP 
+         * to be "up" and anything else to be "down".
+         */
+        slave->up = iface->enabled && iface->delay_expires == LLONG_MAX;
          if (slave->up) {
              bond.up = true;
          }
          netdev_get_etheraddr(iface->netdev, slave->mac);
      }
+
+    if (cfg_get_bool(0, "bonding.%s.fake-iface", port->name)) {
+        struct netdev *bond_netdev;
+
+        if (!netdev_open(port->name, NETDEV_ETH_TYPE_NONE, &bond_netdev)) {
+            if (bond.up) {
+                netdev_turn_flags_on(bond_netdev, NETDEV_UP, true);
+            } else {
+                netdev_turn_flags_off(bond_netdev, NETDEV_UP, true);
+            }
+            netdev_close(bond_netdev);
+        }
+    }
+
      proc_net_compat_update_bond(port->name, &bond);
      free(bond.slaves);
  }
@@ -3056,6 +3316,60 @@ iface_from_dp_ifidx(const struct bridge *br, uint16_t dp_ifidx)
  {
      return port_array_get(&br->ifaces, dp_ifidx);
  }
+
+/* Returns true if 'iface' is the name of an "internal" interface on bridge
+ * 'br', that is, an interface that is entirely simulated within the datapath.
+ * The local port (ODPP_LOCAL) is always an internal interface.  Other local
+ * interfaces are created by setting "iface.<iface>.internal = true".
+ *
+ * In addition, we have a kluge-y feature that creates an internal port with
+ * the name of a bonded port if "bonding.<bondname>.fake-iface = true" is set.
+ * This feature needs to go away in the long term.  Until then, this is one
+ * reason why this function takes a name instead of a struct iface: the fake
+ * interfaces created this way do not have a struct iface. */
+static bool
+iface_is_internal(const struct bridge *br, const char *iface)
+{
+    if (!strcmp(iface, br->name)
+        || cfg_get_bool(0, "iface.%s.internal", iface)) {
+        return true;
+    }
+
+    if (cfg_get_bool(0, "bonding.%s.fake-iface", iface)) {
+        struct port *port = port_lookup(br, iface);
+        if (port && port->n_ifaces > 1) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+/* Set Ethernet address of 'iface', if one is specified in the configuration
+ * file. */
+static void
+iface_set_mac(struct iface *iface)
+{
+    uint64_t mac = cfg_get_mac(0, "iface.%s.mac", iface->name);
+    if (mac) {
+        static uint8_t ea[ETH_ADDR_LEN];
+
+        eth_addr_from_uint64(mac, ea);
+        if (eth_addr_is_multicast(ea)) {
+            VLOG_ERR("interface %s: cannot set MAC to multicast address",
+                     iface->name);
+        } else if (iface->dp_ifidx == ODPP_LOCAL) {
+            VLOG_ERR("ignoring iface.%s.mac; use bridge.%s.mac instead",
+                     iface->name, iface->name);
+        } else {
+            int error = netdev_set_etheraddr(iface->netdev, ea);
+            if (error) {
+                VLOG_ERR("interface %s: setting MAC failed (%s)",
+                         iface->name, strerror(error));
+            }
+        }
+    }
+}
  \f
  /* Port mirroring. */
  
@@ -3063,7 +3377,8 @@ static void
  mirror_reconfigure(struct bridge *br)
  {
      struct svec old_mirrors, new_mirrors;
-    size_t i;
+    size_t i, n_rspan_vlans;
+    unsigned long *rspan_vlans;
  
      /* Collect old and new mirrors. */
      svec_init(&old_mirrors);
@@ -3112,6 +3427,29 @@ mirror_reconfigure(struct bridge *br)
              m->out_port->is_mirror_output_port = true;
          }
      }
+
+    /* Update learning disabled vlans (for RSPAN). */
+    rspan_vlans = NULL;
+    n_rspan_vlans = cfg_count("vlan.%s.disable-learning", br->name);
+    if (n_rspan_vlans) {
+        rspan_vlans = bitmap_allocate(4096);
+
+        for (i = 0; i < n_rspan_vlans; i++) {
+            int vlan = cfg_get_vlan(i, "vlan.%s.disable-learning", br->name);
+            if (vlan >= 0) {
+                bitmap_set1(rspan_vlans, vlan);
+                VLOG_INFO("bridge %s: disabling learning on vlan %d\n",
+                          br->name, vlan);
+            } else {
+                VLOG_ERR("bridge %s: invalid value '%s' for learning disabled "
+                         "VLAN", br->name,
+                       cfg_get_string(i, "vlan.%s.disable-learning", br->name));
+            }
+        }
+    }
+    if (mac_learning_set_disabled_vlans(br->ml, rspan_vlans)) {
+        bridge_flush(br);
+    }
  }
  
  static void
@@ -3256,6 +3594,7 @@ mirror_reconfigure_one(struct mirror *m)
      int *vlans;
      size_t i;
      bool mirror_all_ports;
+    bool any_ports_specified;
  
      /* Get output port. */
      out_port_name = cfg_get_key(0, "mirror.%s.%s.output.port",
@@ -3294,11 +3633,18 @@ mirror_reconfigure_one(struct mirror *m)
      cfg_get_all_keys(&src_ports, "%s.select.src-port", pfx);
      cfg_get_all_keys(&dst_ports, "%s.select.dst-port", pfx);
      cfg_get_all_keys(&ports, "%s.select.port", pfx);
+    any_ports_specified = src_ports.n || dst_ports.n || ports.n;
      svec_append(&src_ports, &ports);
      svec_append(&dst_ports, &ports);
      svec_destroy(&ports);
      prune_ports(m, &src_ports);
      prune_ports(m, &dst_ports);
+    if (any_ports_specified && !src_ports.n && !dst_ports.n) {
+        VLOG_ERR("%s: none of the specified ports exist; "
+                 "disabling port mirror %s", pfx, pfx);
+        mirror_destroy(m);
+        goto exit;
+    }
  
      /* Get all the vlans, and drop duplicate and invalid vlans. */
      svec_init(&vlan_strings);
@@ -3350,6 +3696,7 @@ mirror_reconfigure_one(struct mirror *m)
      }
  
      /* Clean up. */
+exit:
      svec_destroy(&src_ports);
      svec_destroy(&dst_ports);
      free(pfx);