Merge citrix branch into master.
[sliver-openvswitch.git] / vswitchd / bridge.c
index ff5d352..f977c2b 100644 (file)
@@ -1,28 +1,16 @@
 /* Copyright (c) 2008, 2009 Nicira Networks
- * 
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
  *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
  *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * In addition, as a special exception, Nicira Networks gives permission
- * to link the code of its release of vswitchd with the OpenSSL project's
- * "OpenSSL" library (or with modified versions of it that use the same
- * license as the "OpenSSL" library), and distribute the linked
- * executables.  You must obey the GNU General Public License in all
- * respects for all of the code used other than "OpenSSL".  If you modify
- * this file, you may extend this exception to your version of the file,
- * but you are not obligated to do so.  If you do not wish to do so,
- * delete this exception statement from your version.
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 #include <config.h>
@@ -39,6 +27,7 @@
 #include <strings.h>
 #include <sys/stat.h>
 #include <sys/socket.h>
+#include <sys/types.h>
 #include <unistd.h>
 #include "bitmap.h"
 #include "cfg.h"
 #include "odp-util.h"
 #include "ofp-print.h"
 #include "ofpbuf.h"
+#include "ofproto/ofproto.h"
+#include "packets.h"
 #include "poll-loop.h"
 #include "port-array.h"
 #include "proc-net-compat.h"
 #include "process.h"
-#include "secchan/ofproto.h"
 #include "socket-util.h"
 #include "stp.h"
 #include "svec.h"
 #include "timeval.h"
 #include "util.h"
+#include "unixctl.h"
 #include "vconn.h"
 #include "vconn-ssl.h"
 #include "xenserver.h"
@@ -167,7 +158,7 @@ struct bridge {
     struct ofproto *ofproto;    /* OpenFlow switch. */
 
     /* Kernel datapath information. */
-    struct dpif dpif;           /* Kernel datapath. */
+    struct dpif *dpif;          /* Datapath. */
     struct port_array ifaces;   /* Indexed by kernel datapath port number. */
 
     /* Bridge ports. */
@@ -215,14 +206,19 @@ static uint64_t bridge_pick_datapath_id(struct bridge *,
                                         const char *devname);
 static uint64_t dpid_from_hash(const void *, size_t nbytes);
 
+static void bridge_unixctl_fdb_show(struct unixctl_conn *, const char *args);
+
+static void bond_init(void);
 static void bond_run(struct bridge *);
 static void bond_wait(struct bridge *);
 static void bond_rebalance_port(struct port *);
+static void bond_send_learning_packets(struct port *);
 
 static void port_create(struct bridge *, const char *name);
 static void port_reconfigure(struct port *);
 static void port_destroy(struct port *);
 static struct port *port_lookup(const struct bridge *, const char *name);
+static struct iface *port_lookup_iface(const struct port *, const char *name);
 static struct port *port_from_dp_ifidx(const struct bridge *,
                                        uint16_t dp_ifidx);
 static void port_update_bond_compat(struct port *);
@@ -265,8 +261,8 @@ bridge_get_ifaces(struct svec *svec)
             for (j = 0; j < port->n_ifaces; j++) {
                 struct iface *iface = port->ifaces[j];
                 if (iface->dp_ifidx < 0) {
-                    VLOG_ERR("%s interface not in dp%u, ignoring",
-                             iface->name, dpif_id(&br->dpif));
+                    VLOG_ERR("%s interface not in datapath %s, ignoring",
+                             iface->name, dpif_name(br->dpif));
                 } else {
                     if (iface->dp_ifidx != ODPP_LOCAL) {
                         svec_add(svec, iface->name);
@@ -281,28 +277,37 @@ bridge_get_ifaces(struct svec *svec)
 void
 bridge_init(void)
 {
-    int retval;
-    int i;
+    struct svec dpif_names;
+    size_t i;
 
-    for (i = 0; i < DP_MAX; i++) {
-        struct dpif dpif;
-        char devname[16];
+    unixctl_command_register("fdb/show", bridge_unixctl_fdb_show);
 
-        sprintf(devname, "dp%d", i);
-        retval = dpif_open(devname, &dpif);
+    dp_enumerate(&dpif_names);
+    for (i = 0; i < dpif_names.n; i++) {
+        const char *dpif_name = dpif_names.names[i];
+        struct dpif *dpif;
+        int retval;
+
+        retval = dpif_open(dpif_name, &dpif);
         if (!retval) {
-            char dpif_name[IF_NAMESIZE];
-            if (dpif_get_name(&dpif, dpif_name, sizeof dpif_name)
-                || !cfg_has("bridge.%s.port", dpif_name)) {
-                dpif_delete(&dpif);
+            struct svec all_names;
+            size_t j;
+
+            svec_init(&all_names);
+            dpif_get_all_names(dpif, &all_names);
+            for (j = 0; j < all_names.n; j++) {
+                if (cfg_has("bridge.%s.port", all_names.names[j])) {
+                    goto found;
+                }
             }
-            dpif_close(&dpif);
-        } else if (retval != ENODEV) {
-            VLOG_ERR("failed to delete datapath dp%d: %s",
-                     i, strerror(retval));
+            dpif_delete(dpif);
+        found:
+            svec_destroy(&all_names);
+            dpif_close(dpif);
         }
     }
 
+    bond_init();
     bridge_reconfigure();
 }
 
@@ -328,6 +333,7 @@ bridge_configure_ssl(void)
     static char *private_key_file;
     static char *certificate_file;
     static char *cacert_file;
+    struct stat s;
 
     if (config_string_change("ssl.private-key", &private_key_file)) {
         vconn_ssl_set_private_key_file(private_key_file);
@@ -337,7 +343,13 @@ bridge_configure_ssl(void)
         vconn_ssl_set_certificate_file(certificate_file);
     }
 
-    if (config_string_change("ssl.ca-cert", &cacert_file)) {
+    /* We assume that even if the filename hasn't changed, if the CA cert 
+     * file has been removed, that we want to move back into
+     * boot-strapping mode.  This opens a small security hole, because
+     * the old certificate will still be trusted until vSwitch is
+     * restarted.  We may want to address this in vconn's SSL library. */
+    if (config_string_change("ssl.ca-cert", &cacert_file)
+        || (cacert_file && stat(cacert_file, &s) && errno == ENOENT)) {
         vconn_ssl_set_ca_cert_file(cacert_file,
                                    cfg_get_bool(0, "ssl.bootstrap-ca-cert"));
     }
@@ -347,33 +359,19 @@ bridge_configure_ssl(void)
 void
 bridge_reconfigure(void)
 {
-    struct svec old_br, new_br, raw_new_br;
+    struct svec old_br, new_br;
     struct bridge *br, *next;
     size_t i, j;
 
     COVERAGE_INC(bridge_reconfigure);
 
-    /* Collect old bridges. */
+    /* Collect old and new bridges. */
     svec_init(&old_br);
+    svec_init(&new_br);
     LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
         svec_add(&old_br, br->name);
     }
-
-    /* Collect new bridges. */
-    svec_init(&raw_new_br);
-    cfg_get_subsections(&raw_new_br, "bridge");
-    svec_init(&new_br);
-    for (i = 0; i < raw_new_br.n; i++) {
-        const char *name = raw_new_br.names[i];
-        if ((!strncmp(name, "dp", 2) && isdigit(name[2])) ||
-            (!strncmp(name, "nl:", 3) && isdigit(name[3]))) {
-            VLOG_ERR("%s is not a valid bridge name (bridges may not be "
-                     "named \"dp\" or \"nl:\" followed by a digit)", name);
-        } else {
-            svec_add(&new_br, name);
-        }
-    }
-    svec_destroy(&raw_new_br);
+    cfg_get_subsections(&new_br, "bridge");
 
     /* Get rid of deleted bridges and add new bridges. */
     svec_sort(&old_br);
@@ -414,16 +412,17 @@ bridge_reconfigure(void)
         size_t n_dpif_ports;
         struct svec want_ifaces;
 
-        dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports);
+        dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports);
         bridge_get_all_ifaces(br, &want_ifaces);
         for (i = 0; i < n_dpif_ports; i++) {
             const struct odp_port *p = &dpif_ports[i];
             if (!svec_contains(&want_ifaces, p->devname)
                 && strcmp(p->devname, br->name)) {
-                int retval = dpif_port_del(&br->dpif, p->port);
+                int retval = dpif_port_del(br->dpif, p->port);
                 if (retval) {
-                    VLOG_ERR("failed to remove %s interface from dp%u: %s",
-                             p->devname, dpif_id(&br->dpif), strerror(retval));
+                    VLOG_ERR("failed to remove %s interface from %s: %s",
+                             p->devname, dpif_name(br->dpif),
+                             strerror(retval));
                 }
             }
         }
@@ -434,9 +433,8 @@ bridge_reconfigure(void)
         struct odp_port *dpif_ports;
         size_t n_dpif_ports;
         struct svec cur_ifaces, want_ifaces, add_ifaces;
-        int next_port_no;
 
-        dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports);
+        dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports);
         svec_init(&cur_ifaces);
         for (i = 0; i < n_dpif_ports; i++) {
             svec_add(&cur_ifaces, dpif_ports[i].devname);
@@ -446,28 +444,20 @@ bridge_reconfigure(void)
         bridge_get_all_ifaces(br, &want_ifaces);
         svec_diff(&want_ifaces, &cur_ifaces, &add_ifaces, NULL, NULL);
 
-        next_port_no = 1;
         for (i = 0; i < add_ifaces.n; i++) {
             const char *if_name = add_ifaces.names[i];
-            for (;;) {
-                int internal = cfg_get_bool(0, "iface.%s.internal", if_name);
-                int error = dpif_port_add(&br->dpif, if_name, next_port_no++,
-                                          internal ? ODP_PORT_INTERNAL : 0);
-                if (error != EEXIST) {
-                    if (next_port_no >= 256) {
-                        VLOG_ERR("ran out of valid port numbers on dp%u",
-                                 dpif_id(&br->dpif));
-                        goto out;
-                    }
-                    if (error) {
-                        VLOG_ERR("failed to add %s interface to dp%u: %s",
-                                 if_name, dpif_id(&br->dpif), strerror(error));
-                    }
-                    break;
-                }
+            int internal = cfg_get_bool(0, "iface.%s.internal", if_name);
+            int flags = internal ? ODP_PORT_INTERNAL : 0;
+            int error = dpif_port_add(br->dpif, if_name, flags, NULL);
+            if (error == EXFULL) {
+                VLOG_ERR("ran out of valid port numbers on %s",
+                         dpif_name(br->dpif));
+                break;
+            } else if (error) {
+                VLOG_ERR("failed to add %s interface to %s: %s",
+                         if_name, dpif_name(br->dpif), strerror(error));
             }
         }
-    out:
         svec_destroy(&cur_ifaces);
         svec_destroy(&want_ifaces);
         svec_destroy(&add_ifaces);
@@ -477,12 +467,10 @@ bridge_reconfigure(void)
         uint64_t dpid;
         struct iface *local_iface = NULL;
         const char *devname;
-        uint8_t engine_type = br->dpif.minor;
-        uint8_t engine_id = br->dpif.minor;
+        uint8_t engine_type, engine_id;
         bool add_id_to_iface = false;
         struct svec nf_hosts;
 
-
         bridge_fetch_dp_ifaces(br);
         for (i = 0; i < br->n_ports; ) {
             struct port *port = br->ports[i];
@@ -490,15 +478,16 @@ bridge_reconfigure(void)
             for (j = 0; j < port->n_ifaces; ) {
                 struct iface *iface = port->ifaces[j];
                 if (iface->dp_ifidx < 0) {
-                    VLOG_ERR("%s interface not in dp%u, dropping",
-                             iface->name, dpif_id(&br->dpif));
+                    VLOG_ERR("%s interface not in %s, dropping",
+                             iface->name, dpif_name(br->dpif));
                     iface_destroy(iface);
                 } else {
                     if (iface->dp_ifidx == ODPP_LOCAL) {
                         local_iface = iface;
                     }
-                    VLOG_DBG("dp%u has interface %s on port %d",
-                             dpif_id(&br->dpif), iface->name, iface->dp_ifidx);
+                    VLOG_DBG("%s has interface %s on port %d",
+                             dpif_name(br->dpif),
+                             iface->name, iface->dp_ifidx);
                     j++;
                 }
             }
@@ -526,6 +515,7 @@ bridge_reconfigure(void)
         ofproto_set_datapath_id(br->ofproto, dpid);
 
         /* Set NetFlow configuration on this bridge. */
+        dpif_get_netflow_ids(br->dpif, &engine_type, &engine_id);
         if (cfg_has("netflow.%s.engine-type", br->name)) {
             engine_type = cfg_get_int(0, "netflow.%s.engine-type", 
                     br->name);
@@ -726,10 +716,10 @@ bridge_pick_datapath_id(struct bridge *br,
 static uint64_t
 dpid_from_hash(const void *data, size_t n)
 {
-    uint8_t hash[SHA1HashSize];
+    uint8_t hash[SHA1_DIGEST_SIZE];
 
     BUILD_ASSERT_DECL(sizeof hash >= ETH_ADDR_LEN);
-    SHA1Bytes(data, n, hash);
+    sha1_bytes(data, n, hash);
     eth_addr_mark_random(hash);
     return eth_addr_to_uint64(hash);
 }
@@ -786,6 +776,32 @@ bridge_flush(struct bridge *br)
     }
 }
 \f
+/* Bridge unixctl user interface functions. */
+static void
+bridge_unixctl_fdb_show(struct unixctl_conn *conn, const char *args)
+{
+    struct ds ds = DS_EMPTY_INITIALIZER;
+    const struct bridge *br;
+
+    br = bridge_lookup(args);
+    if (!br) {
+        unixctl_command_reply(conn, 501, "no such bridge");
+        return;
+    }
+
+    ds_put_cstr(&ds, " port  VLAN  MAC                Age\n");
+    if (br->ml) {
+        const struct mac_entry *e;
+        LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
+            ds_put_format(&ds, "%5d  %4d  "ETH_ADDR_FMT"  %3d\n",
+                          e->port, e->vlan, ETH_ADDR_ARGS(e->mac),
+                          mac_entry_age(e));
+        }
+    }
+    unixctl_command_reply(conn, 200, ds_cstr(&ds));
+    ds_destroy(&ds);
+}
+\f
 /* Bridge reconfiguration functions. */
 
 static struct bridge *
@@ -798,7 +814,7 @@ bridge_create(const char *name)
     br = xcalloc(1, sizeof *br);
 
     error = dpif_create(name, &br->dpif);
-    if (error == EEXIST) {
+    if (error == EEXIST || error == EBUSY) {
         error = dpif_open(name, &br->dpif);
         if (error) {
             VLOG_ERR("datapath %s already exists but cannot be opened: %s",
@@ -806,7 +822,7 @@ bridge_create(const char *name)
             free(br);
             return NULL;
         }
-        dpif_flow_flush(&br->dpif);
+        dpif_flow_flush(br->dpif);
     } else if (error) {
         VLOG_ERR("failed to create datapath %s: %s", name, strerror(error));
         free(br);
@@ -816,8 +832,8 @@ bridge_create(const char *name)
     error = ofproto_create(name, &bridge_ofhooks, br, &br->ofproto);
     if (error) {
         VLOG_ERR("failed to create switch %s: %s", name, strerror(error));
-        dpif_delete(&br->dpif);
-        dpif_close(&br->dpif);
+        dpif_delete(br->dpif);
+        dpif_close(br->dpif);
         free(br);
         return NULL;
     }
@@ -834,7 +850,7 @@ bridge_create(const char *name)
 
     list_push_back(&all_bridges, &br->node);
 
-    VLOG_INFO("created bridge %s on dp%u", br->name, dpif_id(&br->dpif));
+    VLOG_INFO("created bridge %s on %s", br->name, dpif_name(br->dpif));
 
     return br;
 }
@@ -849,12 +865,12 @@ bridge_destroy(struct bridge *br)
             port_destroy(br->ports[br->n_ports - 1]);
         }
         list_remove(&br->node);
-        error = dpif_delete(&br->dpif);
+        error = dpif_delete(br->dpif);
         if (error && error != ENOENT) {
-            VLOG_ERR("failed to delete dp%u: %s",
-                     dpif_id(&br->dpif), strerror(error));
+            VLOG_ERR("failed to delete %s: %s",
+                     dpif_name(br->dpif), strerror(error));
         }
-        dpif_close(&br->dpif);
+        dpif_close(br->dpif);
         ofproto_destroy(br->ofproto);
         free(br->controller);
         mac_learning_destroy(br->ml);
@@ -945,9 +961,16 @@ bridge_reconfigure_one(struct bridge *br)
     svec_init(&new_ports);
     cfg_get_all_keys(&new_ports, "bridge.%s.port", br->name);
     svec_sort(&new_ports);
-    if (bridge_get_controller(br) && !svec_contains(&new_ports, br->name)) {
-        svec_add(&new_ports, br->name);
-        svec_sort(&new_ports);
+    if (bridge_get_controller(br)) {
+        char local_name[IF_NAMESIZE];
+        int error;
+
+        error = dpif_port_get_name(br->dpif, ODPP_LOCAL,
+                                   local_name, sizeof local_name);
+        if (!error && !svec_contains(&new_ports, local_name)) {
+            svec_add(&new_ports, local_name);
+            svec_sort(&new_ports);
+        }
     }
     if (!svec_is_unique(&new_ports)) {
         VLOG_WARN("bridge %s: %s specified twice as bridge port",
@@ -1074,11 +1097,17 @@ bridge_reconfigure_controller(struct bridge *br)
         int rate_limit, burst_limit;
 
         if (!strcmp(controller, "discover")) {
+            bool update_resolv_conf = true;
+
+            if (cfg_has("%s.update-resolv.conf", pfx)) {
+                update_resolv_conf = cfg_get_bool(0, "%s.update-resolv.conf",
+                        pfx);
+            }
             ofproto_set_discovery(br->ofproto, true,
                                   cfg_get_string(0, "%s.accept-regex", pfx),
-                                  cfg_get_bool(0, "%s.update-resolv.conf",
-                                               pfx));
+                                  update_resolv_conf);
         } else {
+            char local_name[IF_NAMESIZE];
             struct netdev *netdev;
             bool in_band;
             int error;
@@ -1089,7 +1118,11 @@ bridge_reconfigure_controller(struct bridge *br)
             ofproto_set_discovery(br->ofproto, false, NULL, NULL);
             ofproto_set_in_band(br->ofproto, in_band);
 
-            error = netdev_open(br->name, NETDEV_ETH_TYPE_NONE, &netdev);
+            error = dpif_port_get_name(br->dpif, ODPP_LOCAL,
+                                       local_name, sizeof local_name);
+            if (!error) {
+                error = netdev_open(local_name, NETDEV_ETH_TYPE_NONE, &netdev);
+            }
             if (!error) {
                 if (cfg_is_valid(CFG_IP | CFG_REQUIRED, "%s.ip", pfx)) {
                     struct in_addr ip, mask, gateway;
@@ -1129,8 +1162,13 @@ bridge_reconfigure_controller(struct bridge *br)
                              || !strcmp(fail_mode, "open")));
 
         probe = cfg_get_int(0, "%s.inactivity-probe", pfx);
-        ofproto_set_probe_interval(br->ofproto,
-                                   probe ? probe : cfg_get_int(0, "mgmt.inactivity-probe"));
+        if (probe < 5) {
+            probe = cfg_get_int(0, "mgmt.inactivity-probe");
+            if (probe < 5) {
+                probe = 15;
+            }
+        }
+        ofproto_set_probe_interval(br->ofproto, probe);
 
         max_backoff = cfg_get_int(0, "%s.max-backoff", pfx);
         if (!max_backoff) {
@@ -1237,17 +1275,17 @@ bridge_fetch_dp_ifaces(struct bridge *br)
     }
     port_array_clear(&br->ifaces);
 
-    dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports);
+    dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports);
     for (i = 0; i < n_dpif_ports; i++) {
         struct odp_port *p = &dpif_ports[i];
         struct iface *iface = iface_lookup(br, p->devname);
         if (iface) {
             if (iface->dp_ifidx >= 0) {
-                VLOG_WARN("dp%u reported interface %s twice",
-                          dpif_id(&br->dpif), p->devname);
+                VLOG_WARN("%s reported interface %s twice",
+                          dpif_name(br->dpif), p->devname);
             } else if (iface_from_dp_ifidx(br, p->port)) {
-                VLOG_WARN("dp%u reported interface %"PRIu16" twice",
-                          dpif_id(&br->dpif), p->port);
+                VLOG_WARN("%s reported interface %"PRIu16" twice",
+                          dpif_name(br->dpif), p->port);
             } else {
                 port_array_set(&br->ifaces, p->port, iface);
                 iface->dp_ifidx = p->port;
@@ -1259,11 +1297,16 @@ bridge_fetch_dp_ifaces(struct bridge *br)
 \f
 /* Bridge packet processing functions. */
 
+static int
+bond_hash(const uint8_t mac[ETH_ADDR_LEN])
+{
+    return hash_bytes(mac, ETH_ADDR_LEN, 0) & BOND_MASK;
+}
+
 static struct bond_entry *
 lookup_bond_entry(const struct port *port, const uint8_t mac[ETH_ADDR_LEN])
 {
-    size_t h = hash_bytes(mac, ETH_ADDR_LEN, 0);
-    return &port->bond_hash[h & BOND_MASK];
+    return &port->bond_hash[bond_hash(mac)];
 }
 
 static int
@@ -1279,7 +1322,7 @@ bond_choose_iface(const struct port *port)
 }
 
 static bool
-choose_output_iface(const struct port *port, const flow_t *flow,
+choose_output_iface(const struct port *port, const uint8_t *dl_src,
                     uint16_t *dp_ifidx, tag_type *tags)
 {
     struct iface *iface;
@@ -1288,7 +1331,7 @@ choose_output_iface(const struct port *port, const flow_t *flow,
     if (port->n_ifaces == 1) {
         iface = port->ifaces[0];
     } else {
-        struct bond_entry *e = lookup_bond_entry(port, flow->dl_src);
+        struct bond_entry *e = lookup_bond_entry(port, dl_src);
         if (e->iface_idx < 0 || e->iface_idx >= port->n_ifaces
             || !port->ifaces[e->iface_idx]->enabled) {
             /* XXX select interface properly.  The current interface selection
@@ -1324,6 +1367,10 @@ bond_link_status_update(struct iface *iface, bool carrier)
         iface->delay_expires = LLONG_MAX;
         VLOG_INFO_RL(&rl, "interface %s: will not be %s",
                      iface->name, carrier ? "disabled" : "enabled");
+    } else if (carrier && port->updelay && port->active_iface < 0) {
+        iface->delay_expires = time_msec();
+        VLOG_INFO_RL(&rl, "interface %s: skipping %d ms updelay since no "
+                     "other interface is up", iface->name, port->updelay);
     } else {
         int delay = carrier ? port->updelay : port->downdelay;
         iface->delay_expires = time_msec() + delay;
@@ -1354,6 +1401,38 @@ bond_choose_active_iface(struct port *port)
     }
 }
 
+static void
+bond_enable_slave(struct iface *iface, bool enable)
+{
+    struct port *port = iface->port;
+    struct bridge *br = port->bridge;
+
+    iface->delay_expires = LLONG_MAX;
+    if (enable == iface->enabled) {
+        return;
+    }
+
+    iface->enabled = enable;
+    if (!iface->enabled) {
+        VLOG_WARN("interface %s: disabled", iface->name);
+        ofproto_revalidate(br->ofproto, iface->tag);
+        if (iface->port_ifidx == port->active_iface) {
+            ofproto_revalidate(br->ofproto,
+                               port->active_iface_tag);
+            bond_choose_active_iface(port);
+        }
+        bond_send_learning_packets(port);
+    } else {
+        VLOG_WARN("interface %s: enabled", iface->name);
+        if (port->active_iface < 0) {
+            ofproto_revalidate(br->ofproto, port->no_ifaces_tag);
+            bond_choose_active_iface(port);
+            bond_send_learning_packets(port);
+        }
+        iface->tag = tag_create_random();
+    }
+}
+
 static void
 bond_run(struct bridge *br)
 {
@@ -1367,25 +1446,7 @@ bond_run(struct bridge *br)
         for (j = 0; j < port->n_ifaces; j++) {
             struct iface *iface = port->ifaces[j];
             if (time_msec() >= iface->delay_expires) {
-                iface->delay_expires = LLONG_MAX;
-                iface->enabled = !iface->enabled;
-                VLOG_WARN("interface %s: %s",
-                          iface->name,
-                          iface->enabled ? "enabled" : "disabled");
-                if (!iface->enabled) {
-                    ofproto_revalidate(br->ofproto, iface->tag);
-                    if (iface->port_ifidx == port->active_iface) {
-                        ofproto_revalidate(br->ofproto,
-                                           port->active_iface_tag);
-                        bond_choose_active_iface(port);
-                    }
-                } else {
-                    if (port->active_iface < 0) {
-                        ofproto_revalidate(br->ofproto, port->no_ifaces_tag);
-                        bond_choose_active_iface(port);
-                    }
-                    iface->tag = tag_create_random();
-                }
+                bond_enable_slave(iface, !iface->enabled);
             }
         }
     }
@@ -1430,7 +1491,7 @@ set_dst(struct dst *p, const flow_t *flow,
     p->vlan = (out_port->vlan >= 0 ? OFP_VLAN_NONE
               : in_port->vlan >= 0 ? in_port->vlan
               : ntohs(flow->dl_vlan));
-    return choose_output_iface(out_port, flow, &p->dp_ifidx, tags);
+    return choose_output_iface(out_port, flow->dl_src, &p->dp_ifidx, tags);
 }
 
 static void
@@ -1719,12 +1780,32 @@ process_flow(struct bridge *br, const flow_t *flow,
         goto done;
     }
 
-    /* Drop multicast and broadcast packets on inactive bonded interfaces, to
+    /* Multicast (and broadcast) packets on bonds need special attention, to
      * avoid receiving duplicates. */
     if (in_port->n_ifaces > 1 && eth_addr_is_multicast(flow->dl_dst)) {
         *tags |= in_port->active_iface_tag;
         if (in_port->active_iface != in_iface->port_ifidx) {
+            /* Drop all multicast packets on inactive slaves. */
             goto done;
+        } else {
+            /* Drop all multicast packets for which we have learned a different
+             * input port, because we probably sent the packet on one slaves
+             * and got it back on the active slave.  Broadcast ARP replies are
+             * an exception to this rule: the host has moved to another
+             * switch. */
+            int src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan);
+            if (src_idx != -1 && src_idx != in_port->port_idx) {
+                if (packet) {
+                    if (!is_bcast_arp_reply(flow, packet)) {
+                        goto done;
+                    }
+                } else {
+                    /* No way to know whether it's an ARP reply, because the
+                     * flow entry doesn't include enough information and we
+                     * don't have a packet.  Punt. */
+                    return false;
+                }
+            }
         }
     }
 
@@ -1732,27 +1813,9 @@ process_flow(struct bridge *br, const flow_t *flow,
     out_port = FLOOD_PORT;
     if (br->ml) {
         int out_port_idx;
-        bool may_learn;
-
-        if (!packet) {
-            /* Don't try to learn from revalidation. */
-            may_learn = false;
-        } else if (in_port->n_ifaces > 1) {
-            /* If the packet arrived on a bonded port, don't learn from it
-             * unless we haven't learned any port at all for that address
-             * (because we probably sent the packet on one bonded interface and
-             * got it back on the other).  Broadcast ARP replies are an
-             * exception to this rule: the host has moved to another switch. */
-            int src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan);
-            may_learn = (src_idx < 0
-                         || src_idx == in_port->port_idx
-                         || is_bcast_arp_reply(flow, packet));
-        } else {
-            may_learn = true;
-        }
 
-        /* Learn source MAC. */
-        if (may_learn) {
+        /* Learn source MAC (but don't try to learn from revalidation). */
+        if (packet) {
             tag_type rev_tag = mac_learning_learn(br->ml, flow->dl_src,
                                                   vlan, in_port->port_idx);
             if (rev_tag) {
@@ -1908,6 +1971,8 @@ static struct ofhooks bridge_ofhooks = {
     bridge_account_checkpoint_ofhook_cb,
 };
 \f
+/* Bonding functions. */
+
 /* Statistics for a single interface on a bonded port, used for load-based
  * bond rebalancing.  */
 struct slave_balance {
@@ -2058,7 +2123,6 @@ bond_shift_load(struct slave_balance *from, struct slave_balance *to,
     ofproto_revalidate(port->bridge->ofproto, hash->iface_tag);
     hash->iface_idx = to->iface->port_ifidx;
     hash->iface_tag = tag_create_random();
-
 }
 
 static void
@@ -2167,6 +2231,369 @@ bond_rebalance_port(struct port *port)
         e->tx_bytes /= 2;
     }
 }
+
+static void
+bond_send_learning_packets(struct port *port)
+{
+    struct bridge *br = port->bridge;
+    struct mac_entry *e;
+    struct ofpbuf packet;
+    int error, n_packets, n_errors;
+
+    if (!port->n_ifaces || port->active_iface < 0 || !br->ml) {
+        return;
+    }
+
+    ofpbuf_init(&packet, 128);
+    error = n_packets = n_errors = 0;
+    LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
+        static const char s[] = "Open vSwitch Bond Failover";
+        union ofp_action actions[2], *a;
+        struct eth_header *eth;
+        struct llc_snap_header *llc_snap;
+        uint16_t dp_ifidx;
+        tag_type tags = 0;
+        flow_t flow;
+        int retval;
+
+        if (e->port == port->port_idx
+            || !choose_output_iface(port, e->mac, &dp_ifidx, &tags)) {
+            continue;
+        }
+
+        /* Compose packet to send. */
+        ofpbuf_clear(&packet);
+        eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN);
+        llc_snap = ofpbuf_put_zeros(&packet, LLC_SNAP_HEADER_LEN);
+        ofpbuf_put(&packet, s, sizeof s); /* Includes null byte. */
+        ofpbuf_put(&packet, e->mac, ETH_ADDR_LEN);
+
+        memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN);
+        memcpy(eth->eth_src, e->mac, ETH_ADDR_LEN);
+        eth->eth_type = htons(packet.size - ETH_HEADER_LEN);
+
+        llc_snap->llc.llc_dsap = LLC_DSAP_SNAP;
+        llc_snap->llc.llc_ssap = LLC_SSAP_SNAP;
+        llc_snap->llc.llc_cntl = LLC_CNTL_SNAP;
+        memcpy(llc_snap->snap.snap_org, "\x00\x23\x20", 3);
+        llc_snap->snap.snap_type = htons(0xf177); /* Random number. */
+
+        /* Compose actions. */
+        memset(actions, 0, sizeof actions);
+        a = actions;
+        if (e->vlan) {
+            a->vlan_vid.type = htons(OFPAT_SET_VLAN_VID);
+            a->vlan_vid.len = htons(sizeof *a);
+            a->vlan_vid.vlan_vid = htons(e->vlan);
+            a++;
+        }
+        a->output.type = htons(OFPAT_OUTPUT);
+        a->output.len = htons(sizeof *a);
+        a->output.port = htons(odp_port_to_ofp_port(dp_ifidx));
+        a++;
+
+        /* Send packet. */
+        n_packets++;
+        flow_extract(&packet, ODPP_NONE, &flow);
+        retval = ofproto_send_packet(br->ofproto, &flow, actions, a - actions,
+                                     &packet);
+        if (retval) {
+            error = retval;
+            n_errors++;
+        }
+    }
+    ofpbuf_uninit(&packet);
+
+    if (n_errors) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+        VLOG_WARN_RL(&rl, "bond %s: %d errors sending %d gratuitous learning "
+                     "packets, last error was: %s",
+                     port->name, n_errors, n_packets, strerror(error));
+    } else {
+        VLOG_DBG("bond %s: sent %d gratuitous learning packets",
+                 port->name, n_packets);
+    }
+}
+\f
+/* Bonding unixctl user interface functions. */
+
+static void
+bond_unixctl_list(struct unixctl_conn *conn, const char *args UNUSED)
+{
+    struct ds ds = DS_EMPTY_INITIALIZER;
+    const struct bridge *br;
+
+    ds_put_cstr(&ds, "bridge\tbond\tslaves\n");
+
+    LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+        size_t i;
+
+        for (i = 0; i < br->n_ports; i++) {
+            const struct port *port = br->ports[i];
+            if (port->n_ifaces > 1) {
+                size_t j;
+
+                ds_put_format(&ds, "%s\t%s\t", br->name, port->name);
+                for (j = 0; j < port->n_ifaces; j++) {
+                    const struct iface *iface = port->ifaces[j];
+                    if (j) {
+                        ds_put_cstr(&ds, ", ");
+                    }
+                    ds_put_cstr(&ds, iface->name);
+                }
+                ds_put_char(&ds, '\n');
+            }
+        }
+    }
+    unixctl_command_reply(conn, 200, ds_cstr(&ds));
+    ds_destroy(&ds);
+}
+
+static struct port *
+bond_find(const char *name)
+{
+    const struct bridge *br;
+
+    LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+        size_t i;
+
+        for (i = 0; i < br->n_ports; i++) {
+            struct port *port = br->ports[i];
+            if (!strcmp(port->name, name) && port->n_ifaces > 1) {
+                return port;
+            }
+        }
+    }
+    return NULL;
+}
+
+static void
+bond_unixctl_show(struct unixctl_conn *conn, const char *args)
+{
+    struct ds ds = DS_EMPTY_INITIALIZER;
+    const struct port *port;
+    size_t j;
+
+    port = bond_find(args);
+    if (!port) {
+        unixctl_command_reply(conn, 501, "no such bond");
+        return;
+    }
+
+    ds_put_format(&ds, "updelay: %d ms\n", port->updelay);
+    ds_put_format(&ds, "downdelay: %d ms\n", port->downdelay);
+    ds_put_format(&ds, "next rebalance: %lld ms\n",
+                  port->bridge->bond_next_rebalance - time_msec());
+    for (j = 0; j < port->n_ifaces; j++) {
+        const struct iface *iface = port->ifaces[j];
+        struct bond_entry *be;
+
+        /* Basic info. */
+        ds_put_format(&ds, "slave %s: %s\n",
+                      iface->name, iface->enabled ? "enabled" : "disabled");
+        if (j == port->active_iface) {
+            ds_put_cstr(&ds, "\tactive slave\n");
+        }
+        if (iface->delay_expires != LLONG_MAX) {
+            ds_put_format(&ds, "\t%s expires in %lld ms\n",
+                          iface->enabled ? "downdelay" : "updelay",
+                          iface->delay_expires - time_msec());
+        }
+
+        /* Hashes. */
+        for (be = port->bond_hash; be <= &port->bond_hash[BOND_MASK]; be++) {
+            int hash = be - port->bond_hash;
+            struct mac_entry *me;
+
+            if (be->iface_idx != j) {
+                continue;
+            }
+
+            ds_put_format(&ds, "\thash %d: %lld kB load\n",
+                          hash, be->tx_bytes / 1024);
+
+            /* MACs. */
+            if (!port->bridge->ml) {
+                break;
+            }
+
+            LIST_FOR_EACH (me, struct mac_entry, lru_node,
+                           &port->bridge->ml->lrus) {
+                uint16_t dp_ifidx;
+                tag_type tags = 0;
+                if (bond_hash(me->mac) == hash
+                    && me->port != port->port_idx
+                    && choose_output_iface(port, me->mac, &dp_ifidx, &tags)
+                    && dp_ifidx == iface->dp_ifidx)
+                {
+                    ds_put_format(&ds, "\t\t"ETH_ADDR_FMT"\n",
+                                  ETH_ADDR_ARGS(me->mac));
+                }
+            }
+        }
+    }
+    unixctl_command_reply(conn, 200, ds_cstr(&ds));
+    ds_destroy(&ds);
+}
+
+static void
+bond_unixctl_migrate(struct unixctl_conn *conn, const char *args_)
+{
+    char *args = (char *) args_;
+    char *save_ptr = NULL;
+    char *bond_s, *hash_s, *slave_s;
+    uint8_t mac[ETH_ADDR_LEN];
+    struct port *port;
+    struct iface *iface;
+    struct bond_entry *entry;
+    int hash;
+
+    bond_s = strtok_r(args, " ", &save_ptr);
+    hash_s = strtok_r(NULL, " ", &save_ptr);
+    slave_s = strtok_r(NULL, " ", &save_ptr);
+    if (!slave_s) {
+        unixctl_command_reply(conn, 501,
+                              "usage: bond/migrate BOND HASH SLAVE");
+        return;
+    }
+
+    port = bond_find(bond_s);
+    if (!port) {
+        unixctl_command_reply(conn, 501, "no such bond");
+        return;
+    }
+
+    if (sscanf(hash_s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))
+        == ETH_ADDR_SCAN_COUNT) {
+        hash = bond_hash(mac);
+    } else if (strspn(hash_s, "0123456789") == strlen(hash_s)) {
+        hash = atoi(hash_s) & BOND_MASK;
+    } else {
+        unixctl_command_reply(conn, 501, "bad hash");
+        return;
+    }
+
+    iface = port_lookup_iface(port, slave_s);
+    if (!iface) {
+        unixctl_command_reply(conn, 501, "no such slave");
+        return;
+    }
+
+    if (!iface->enabled) {
+        unixctl_command_reply(conn, 501, "cannot migrate to disabled slave");
+        return;
+    }
+
+    entry = &port->bond_hash[hash];
+    ofproto_revalidate(port->bridge->ofproto, entry->iface_tag);
+    entry->iface_idx = iface->port_ifidx;
+    entry->iface_tag = tag_create_random();
+    unixctl_command_reply(conn, 200, "migrated");
+}
+
+static void
+bond_unixctl_set_active_slave(struct unixctl_conn *conn, const char *args_)
+{
+    char *args = (char *) args_;
+    char *save_ptr = NULL;
+    char *bond_s, *slave_s;
+    struct port *port;
+    struct iface *iface;
+
+    bond_s = strtok_r(args, " ", &save_ptr);
+    slave_s = strtok_r(NULL, " ", &save_ptr);
+    if (!slave_s) {
+        unixctl_command_reply(conn, 501,
+                              "usage: bond/set-active-slave BOND SLAVE");
+        return;
+    }
+
+    port = bond_find(bond_s);
+    if (!port) {
+        unixctl_command_reply(conn, 501, "no such bond");
+        return;
+    }
+
+    iface = port_lookup_iface(port, slave_s);
+    if (!iface) {
+        unixctl_command_reply(conn, 501, "no such slave");
+        return;
+    }
+
+    if (!iface->enabled) {
+        unixctl_command_reply(conn, 501, "cannot make disabled slave active");
+        return;
+    }
+
+    if (port->active_iface != iface->port_ifidx) {
+        ofproto_revalidate(port->bridge->ofproto, port->active_iface_tag);
+        port->active_iface = iface->port_ifidx;
+        port->active_iface_tag = tag_create_random();
+        VLOG_INFO("port %s: active interface is now %s",
+                  port->name, iface->name);
+        bond_send_learning_packets(port);
+        unixctl_command_reply(conn, 200, "done");
+    } else {
+        unixctl_command_reply(conn, 200, "no change");
+    }
+}
+
+static void
+enable_slave(struct unixctl_conn *conn, const char *args_, bool enable)
+{
+    char *args = (char *) args_;
+    char *save_ptr = NULL;
+    char *bond_s, *slave_s;
+    struct port *port;
+    struct iface *iface;
+
+    bond_s = strtok_r(args, " ", &save_ptr);
+    slave_s = strtok_r(NULL, " ", &save_ptr);
+    if (!slave_s) {
+        unixctl_command_reply(conn, 501,
+                              "usage: bond/enable/disable-slave BOND SLAVE");
+        return;
+    }
+
+    port = bond_find(bond_s);
+    if (!port) {
+        unixctl_command_reply(conn, 501, "no such bond");
+        return;
+    }
+
+    iface = port_lookup_iface(port, slave_s);
+    if (!iface) {
+        unixctl_command_reply(conn, 501, "no such slave");
+        return;
+    }
+
+    bond_enable_slave(iface, enable);
+    unixctl_command_reply(conn, 501, enable ? "enabled" : "disabled");
+}
+
+static void
+bond_unixctl_enable_slave(struct unixctl_conn *conn, const char *args)
+{
+    enable_slave(conn, args, true);
+}
+
+static void
+bond_unixctl_disable_slave(struct unixctl_conn *conn, const char *args)
+{
+    enable_slave(conn, args, false);
+}
+
+static void
+bond_init(void)
+{
+    unixctl_command_register("bond/list", bond_unixctl_list);
+    unixctl_command_register("bond/show", bond_unixctl_show);
+    unixctl_command_register("bond/migrate", bond_unixctl_migrate);
+    unixctl_command_register("bond/set-active-slave",
+                             bond_unixctl_set_active_slave);
+    unixctl_command_register("bond/enable-slave", bond_unixctl_enable_slave);
+    unixctl_command_register("bond/disable-slave", bond_unixctl_disable_slave);
+}
 \f
 /* Port functions. */
 
@@ -2369,6 +2796,20 @@ port_lookup(const struct bridge *br, const char *name)
     return NULL;
 }
 
+static struct iface *
+port_lookup_iface(const struct port *port, const char *name)
+{
+    size_t j;
+
+    for (j = 0; j < port->n_ifaces; j++) {
+        struct iface *iface = port->ifaces[j];
+        if (!strcmp(iface->name, name)) {
+            return iface;
+        }
+    }
+    return NULL;
+}
+
 static void
 port_update_bonding(struct port *port)
 {
@@ -2517,6 +2958,7 @@ iface_destroy(struct iface *iface)
         if (del_active) {
             ofproto_revalidate(port->bridge->ofproto, port->active_iface_tag);
             bond_choose_active_iface(port);
+            bond_send_learning_packets(port);
         }
 
         port_update_bonding(port);