Rename "secchan" to "ofproto" (library) and "ovs-openflowd" (program).
[sliver-openvswitch.git] / vswitchd / bridge.c
index b266c10..ab55658 100644 (file)
@@ -1,28 +1,16 @@
 /* Copyright (c) 2008, 2009 Nicira Networks
- * 
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
  *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
  *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * In addition, as a special exception, Nicira Networks gives permission
- * to link the code of its release of vswitchd with the OpenSSL project's
- * "OpenSSL" library (or with modified versions of it that use the same
- * license as the "OpenSSL" library), and distribute the linked
- * executables.  You must obey the GNU General Public License in all
- * respects for all of the code used other than "OpenSSL".  If you modify
- * this file, you may extend this exception to your version of the file,
- * but you are not obligated to do so.  If you do not wish to do so,
- * delete this exception statement from your version.
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 #include <config.h>
@@ -39,6 +27,7 @@
 #include <strings.h>
 #include <sys/stat.h>
 #include <sys/socket.h>
+#include <sys/types.h>
 #include <unistd.h>
 #include "bitmap.h"
 #include "cfg.h"
 #include "odp-util.h"
 #include "ofp-print.h"
 #include "ofpbuf.h"
+#include "ofproto/ofproto.h"
 #include "packets.h"
 #include "poll-loop.h"
 #include "port-array.h"
 #include "proc-net-compat.h"
 #include "process.h"
-#include "secchan/ofproto.h"
 #include "socket-util.h"
 #include "stp.h"
 #include "svec.h"
@@ -169,7 +158,7 @@ struct bridge {
     struct ofproto *ofproto;    /* OpenFlow switch. */
 
     /* Kernel datapath information. */
-    struct dpif dpif;           /* Kernel datapath. */
+    struct dpif *dpif;          /* Datapath. */
     struct port_array ifaces;   /* Indexed by kernel datapath port number. */
 
     /* Bridge ports. */
@@ -270,8 +259,8 @@ bridge_get_ifaces(struct svec *svec)
             for (j = 0; j < port->n_ifaces; j++) {
                 struct iface *iface = port->ifaces[j];
                 if (iface->dp_ifidx < 0) {
-                    VLOG_ERR("%s interface not in dp%u, ignoring",
-                             iface->name, dpif_id(&br->dpif));
+                    VLOG_ERR("%s interface not in datapath %s, ignoring",
+                             iface->name, dpif_name(br->dpif));
                 } else {
                     if (iface->dp_ifidx != ODPP_LOCAL) {
                         svec_add(svec, iface->name);
@@ -286,30 +275,35 @@ bridge_get_ifaces(struct svec *svec)
 void
 bridge_init(void)
 {
-    int retval;
-    int i;
-
-    bond_init();
+    struct svec dpif_names;
+    size_t i;
 
-    for (i = 0; i < DP_MAX; i++) {
-        struct dpif dpif;
-        char devname[16];
+    dp_enumerate(&dpif_names);
+    for (i = 0; i < dpif_names.n; i++) {
+        const char *dpif_name = dpif_names.names[i];
+        struct dpif *dpif;
+        int retval;
 
-        sprintf(devname, "dp%d", i);
-        retval = dpif_open(devname, &dpif);
+        retval = dpif_open(dpif_name, &dpif);
         if (!retval) {
-            char dpif_name[IF_NAMESIZE];
-            if (dpif_get_name(&dpif, dpif_name, sizeof dpif_name)
-                || !cfg_has("bridge.%s.port", dpif_name)) {
-                dpif_delete(&dpif);
+            struct svec all_names;
+            size_t j;
+
+            svec_init(&all_names);
+            dpif_get_all_names(dpif, &all_names);
+            for (j = 0; j < all_names.n; j++) {
+                if (cfg_has("bridge.%s.port", all_names.names[j])) {
+                    goto found;
+                }
             }
-            dpif_close(&dpif);
-        } else if (retval != ENODEV) {
-            VLOG_ERR("failed to delete datapath dp%d: %s",
-                     i, strerror(retval));
+            dpif_delete(dpif);
+        found:
+            svec_destroy(&all_names);
+            dpif_close(dpif);
         }
     }
 
+    bond_init();
     bridge_reconfigure();
 }
 
@@ -335,6 +329,7 @@ bridge_configure_ssl(void)
     static char *private_key_file;
     static char *certificate_file;
     static char *cacert_file;
+    struct stat s;
 
     if (config_string_change("ssl.private-key", &private_key_file)) {
         vconn_ssl_set_private_key_file(private_key_file);
@@ -344,7 +339,13 @@ bridge_configure_ssl(void)
         vconn_ssl_set_certificate_file(certificate_file);
     }
 
-    if (config_string_change("ssl.ca-cert", &cacert_file)) {
+    /* We assume that even if the filename hasn't changed, if the CA cert 
+     * file has been removed, that we want to move back into
+     * boot-strapping mode.  This opens a small security hole, because
+     * the old certificate will still be trusted until vSwitch is
+     * restarted.  We may want to address this in vconn's SSL library. */
+    if (config_string_change("ssl.ca-cert", &cacert_file)
+        || (cacert_file && stat(cacert_file, &s) && errno == ENOENT)) {
         vconn_ssl_set_ca_cert_file(cacert_file,
                                    cfg_get_bool(0, "ssl.bootstrap-ca-cert"));
     }
@@ -354,33 +355,19 @@ bridge_configure_ssl(void)
 void
 bridge_reconfigure(void)
 {
-    struct svec old_br, new_br, raw_new_br;
+    struct svec old_br, new_br;
     struct bridge *br, *next;
     size_t i, j;
 
     COVERAGE_INC(bridge_reconfigure);
 
-    /* Collect old bridges. */
+    /* Collect old and new bridges. */
     svec_init(&old_br);
+    svec_init(&new_br);
     LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
         svec_add(&old_br, br->name);
     }
-
-    /* Collect new bridges. */
-    svec_init(&raw_new_br);
-    cfg_get_subsections(&raw_new_br, "bridge");
-    svec_init(&new_br);
-    for (i = 0; i < raw_new_br.n; i++) {
-        const char *name = raw_new_br.names[i];
-        if ((!strncmp(name, "dp", 2) && isdigit(name[2])) ||
-            (!strncmp(name, "nl:", 3) && isdigit(name[3]))) {
-            VLOG_ERR("%s is not a valid bridge name (bridges may not be "
-                     "named \"dp\" or \"nl:\" followed by a digit)", name);
-        } else {
-            svec_add(&new_br, name);
-        }
-    }
-    svec_destroy(&raw_new_br);
+    cfg_get_subsections(&new_br, "bridge");
 
     /* Get rid of deleted bridges and add new bridges. */
     svec_sort(&old_br);
@@ -421,16 +408,17 @@ bridge_reconfigure(void)
         size_t n_dpif_ports;
         struct svec want_ifaces;
 
-        dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports);
+        dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports);
         bridge_get_all_ifaces(br, &want_ifaces);
         for (i = 0; i < n_dpif_ports; i++) {
             const struct odp_port *p = &dpif_ports[i];
             if (!svec_contains(&want_ifaces, p->devname)
                 && strcmp(p->devname, br->name)) {
-                int retval = dpif_port_del(&br->dpif, p->port);
+                int retval = dpif_port_del(br->dpif, p->port);
                 if (retval) {
-                    VLOG_ERR("failed to remove %s interface from dp%u: %s",
-                             p->devname, dpif_id(&br->dpif), strerror(retval));
+                    VLOG_ERR("failed to remove %s interface from %s: %s",
+                             p->devname, dpif_name(br->dpif),
+                             strerror(retval));
                 }
             }
         }
@@ -441,9 +429,8 @@ bridge_reconfigure(void)
         struct odp_port *dpif_ports;
         size_t n_dpif_ports;
         struct svec cur_ifaces, want_ifaces, add_ifaces;
-        int next_port_no;
 
-        dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports);
+        dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports);
         svec_init(&cur_ifaces);
         for (i = 0; i < n_dpif_ports; i++) {
             svec_add(&cur_ifaces, dpif_ports[i].devname);
@@ -453,28 +440,20 @@ bridge_reconfigure(void)
         bridge_get_all_ifaces(br, &want_ifaces);
         svec_diff(&want_ifaces, &cur_ifaces, &add_ifaces, NULL, NULL);
 
-        next_port_no = 1;
         for (i = 0; i < add_ifaces.n; i++) {
             const char *if_name = add_ifaces.names[i];
-            for (;;) {
-                int internal = cfg_get_bool(0, "iface.%s.internal", if_name);
-                int error = dpif_port_add(&br->dpif, if_name, next_port_no++,
-                                          internal ? ODP_PORT_INTERNAL : 0);
-                if (error != EEXIST) {
-                    if (next_port_no >= 256) {
-                        VLOG_ERR("ran out of valid port numbers on dp%u",
-                                 dpif_id(&br->dpif));
-                        goto out;
-                    }
-                    if (error) {
-                        VLOG_ERR("failed to add %s interface to dp%u: %s",
-                                 if_name, dpif_id(&br->dpif), strerror(error));
-                    }
-                    break;
-                }
+            int internal = cfg_get_bool(0, "iface.%s.internal", if_name);
+            int flags = internal ? ODP_PORT_INTERNAL : 0;
+            int error = dpif_port_add(br->dpif, if_name, flags, NULL);
+            if (error == EXFULL) {
+                VLOG_ERR("ran out of valid port numbers on %s",
+                         dpif_name(br->dpif));
+                break;
+            } else if (error) {
+                VLOG_ERR("failed to add %s interface to %s: %s",
+                         if_name, dpif_name(br->dpif), strerror(error));
             }
         }
-    out:
         svec_destroy(&cur_ifaces);
         svec_destroy(&want_ifaces);
         svec_destroy(&add_ifaces);
@@ -484,8 +463,7 @@ bridge_reconfigure(void)
         uint64_t dpid;
         struct iface *local_iface = NULL;
         const char *devname;
-        uint8_t engine_type = br->dpif.minor;
-        uint8_t engine_id = br->dpif.minor;
+        uint8_t engine_type, engine_id;
         bool add_id_to_iface = false;
         struct svec nf_hosts;
 
@@ -496,15 +474,16 @@ bridge_reconfigure(void)
             for (j = 0; j < port->n_ifaces; ) {
                 struct iface *iface = port->ifaces[j];
                 if (iface->dp_ifidx < 0) {
-                    VLOG_ERR("%s interface not in dp%u, dropping",
-                             iface->name, dpif_id(&br->dpif));
+                    VLOG_ERR("%s interface not in %s, dropping",
+                             iface->name, dpif_name(br->dpif));
                     iface_destroy(iface);
                 } else {
                     if (iface->dp_ifidx == ODPP_LOCAL) {
                         local_iface = iface;
                     }
-                    VLOG_DBG("dp%u has interface %s on port %d",
-                             dpif_id(&br->dpif), iface->name, iface->dp_ifidx);
+                    VLOG_DBG("%s has interface %s on port %d",
+                             dpif_name(br->dpif),
+                             iface->name, iface->dp_ifidx);
                     j++;
                 }
             }
@@ -532,6 +511,7 @@ bridge_reconfigure(void)
         ofproto_set_datapath_id(br->ofproto, dpid);
 
         /* Set NetFlow configuration on this bridge. */
+        dpif_get_netflow_ids(br->dpif, &engine_type, &engine_id);
         if (cfg_has("netflow.%s.engine-type", br->name)) {
             engine_type = cfg_get_int(0, "netflow.%s.engine-type", 
                     br->name);
@@ -732,10 +712,10 @@ bridge_pick_datapath_id(struct bridge *br,
 static uint64_t
 dpid_from_hash(const void *data, size_t n)
 {
-    uint8_t hash[SHA1HashSize];
+    uint8_t hash[SHA1_DIGEST_SIZE];
 
     BUILD_ASSERT_DECL(sizeof hash >= ETH_ADDR_LEN);
-    SHA1Bytes(data, n, hash);
+    sha1_bytes(data, n, hash);
     eth_addr_mark_random(hash);
     return eth_addr_to_uint64(hash);
 }
@@ -804,7 +784,7 @@ bridge_create(const char *name)
     br = xcalloc(1, sizeof *br);
 
     error = dpif_create(name, &br->dpif);
-    if (error == EEXIST) {
+    if (error == EEXIST || error == EBUSY) {
         error = dpif_open(name, &br->dpif);
         if (error) {
             VLOG_ERR("datapath %s already exists but cannot be opened: %s",
@@ -812,7 +792,7 @@ bridge_create(const char *name)
             free(br);
             return NULL;
         }
-        dpif_flow_flush(&br->dpif);
+        dpif_flow_flush(br->dpif);
     } else if (error) {
         VLOG_ERR("failed to create datapath %s: %s", name, strerror(error));
         free(br);
@@ -822,8 +802,8 @@ bridge_create(const char *name)
     error = ofproto_create(name, &bridge_ofhooks, br, &br->ofproto);
     if (error) {
         VLOG_ERR("failed to create switch %s: %s", name, strerror(error));
-        dpif_delete(&br->dpif);
-        dpif_close(&br->dpif);
+        dpif_delete(br->dpif);
+        dpif_close(br->dpif);
         free(br);
         return NULL;
     }
@@ -840,7 +820,7 @@ bridge_create(const char *name)
 
     list_push_back(&all_bridges, &br->node);
 
-    VLOG_INFO("created bridge %s on dp%u", br->name, dpif_id(&br->dpif));
+    VLOG_INFO("created bridge %s on %s", br->name, dpif_name(br->dpif));
 
     return br;
 }
@@ -855,12 +835,12 @@ bridge_destroy(struct bridge *br)
             port_destroy(br->ports[br->n_ports - 1]);
         }
         list_remove(&br->node);
-        error = dpif_delete(&br->dpif);
+        error = dpif_delete(br->dpif);
         if (error && error != ENOENT) {
-            VLOG_ERR("failed to delete dp%u: %s",
-                     dpif_id(&br->dpif), strerror(error));
+            VLOG_ERR("failed to delete %s: %s",
+                     dpif_name(br->dpif), strerror(error));
         }
-        dpif_close(&br->dpif);
+        dpif_close(br->dpif);
         ofproto_destroy(br->ofproto);
         free(br->controller);
         mac_learning_destroy(br->ml);
@@ -951,9 +931,16 @@ bridge_reconfigure_one(struct bridge *br)
     svec_init(&new_ports);
     cfg_get_all_keys(&new_ports, "bridge.%s.port", br->name);
     svec_sort(&new_ports);
-    if (bridge_get_controller(br) && !svec_contains(&new_ports, br->name)) {
-        svec_add(&new_ports, br->name);
-        svec_sort(&new_ports);
+    if (bridge_get_controller(br)) {
+        char local_name[IF_NAMESIZE];
+        int error;
+
+        error = dpif_port_get_name(br->dpif, ODPP_LOCAL,
+                                   local_name, sizeof local_name);
+        if (!error && !svec_contains(&new_ports, local_name)) {
+            svec_add(&new_ports, local_name);
+            svec_sort(&new_ports);
+        }
     }
     if (!svec_is_unique(&new_ports)) {
         VLOG_WARN("bridge %s: %s specified twice as bridge port",
@@ -1085,6 +1072,7 @@ bridge_reconfigure_controller(struct bridge *br)
                                   cfg_get_bool(0, "%s.update-resolv.conf",
                                                pfx));
         } else {
+            char local_name[IF_NAMESIZE];
             struct netdev *netdev;
             bool in_band;
             int error;
@@ -1095,7 +1083,11 @@ bridge_reconfigure_controller(struct bridge *br)
             ofproto_set_discovery(br->ofproto, false, NULL, NULL);
             ofproto_set_in_band(br->ofproto, in_band);
 
-            error = netdev_open(br->name, NETDEV_ETH_TYPE_NONE, &netdev);
+            error = dpif_port_get_name(br->dpif, ODPP_LOCAL,
+                                       local_name, sizeof local_name);
+            if (!error) {
+                error = netdev_open(local_name, NETDEV_ETH_TYPE_NONE, &netdev);
+            }
             if (!error) {
                 if (cfg_is_valid(CFG_IP | CFG_REQUIRED, "%s.ip", pfx)) {
                     struct in_addr ip, mask, gateway;
@@ -1135,8 +1127,13 @@ bridge_reconfigure_controller(struct bridge *br)
                              || !strcmp(fail_mode, "open")));
 
         probe = cfg_get_int(0, "%s.inactivity-probe", pfx);
-        ofproto_set_probe_interval(br->ofproto,
-                                   probe ? probe : cfg_get_int(0, "mgmt.inactivity-probe"));
+        if (probe < 5) {
+            probe = cfg_get_int(0, "mgmt.inactivity-probe");
+            if (probe < 5) {
+                probe = 15;
+            }
+        }
+        ofproto_set_probe_interval(br->ofproto, probe);
 
         max_backoff = cfg_get_int(0, "%s.max-backoff", pfx);
         if (!max_backoff) {
@@ -1243,17 +1240,17 @@ bridge_fetch_dp_ifaces(struct bridge *br)
     }
     port_array_clear(&br->ifaces);
 
-    dpif_port_list(&br->dpif, &dpif_ports, &n_dpif_ports);
+    dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports);
     for (i = 0; i < n_dpif_ports; i++) {
         struct odp_port *p = &dpif_ports[i];
         struct iface *iface = iface_lookup(br, p->devname);
         if (iface) {
             if (iface->dp_ifidx >= 0) {
-                VLOG_WARN("dp%u reported interface %s twice",
-                          dpif_id(&br->dpif), p->devname);
+                VLOG_WARN("%s reported interface %s twice",
+                          dpif_name(br->dpif), p->devname);
             } else if (iface_from_dp_ifidx(br, p->port)) {
-                VLOG_WARN("dp%u reported interface %"PRIu16" twice",
-                          dpif_id(&br->dpif), p->port);
+                VLOG_WARN("%s reported interface %"PRIu16" twice",
+                          dpif_name(br->dpif), p->port);
             } else {
                 port_array_set(&br->ifaces, p->port, iface);
                 iface->dp_ifidx = p->port;
@@ -1744,12 +1741,32 @@ process_flow(struct bridge *br, const flow_t *flow,
         goto done;
     }
 
-    /* Drop multicast and broadcast packets on inactive bonded interfaces, to
+    /* Multicast (and broadcast) packets on bonds need special attention, to
      * avoid receiving duplicates. */
     if (in_port->n_ifaces > 1 && eth_addr_is_multicast(flow->dl_dst)) {
         *tags |= in_port->active_iface_tag;
         if (in_port->active_iface != in_iface->port_ifidx) {
+            /* Drop all multicast packets on inactive slaves. */
             goto done;
+        } else {
+            /* Drop all multicast packets for which we have learned a different
+             * input port, because we probably sent the packet on one slaves
+             * and got it back on the active slave.  Broadcast ARP replies are
+             * an exception to this rule: the host has moved to another
+             * switch. */
+            int src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan);
+            if (src_idx != -1 && src_idx != in_port->port_idx) {
+                if (packet) {
+                    if (!is_bcast_arp_reply(flow, packet)) {
+                        goto done;
+                    }
+                } else {
+                    /* No way to know whether it's an ARP reply, because the
+                     * flow entry doesn't include enough information and we
+                     * don't have a packet.  Punt. */
+                    return false;
+                }
+            }
         }
     }
 
@@ -1757,27 +1774,9 @@ process_flow(struct bridge *br, const flow_t *flow,
     out_port = FLOOD_PORT;
     if (br->ml) {
         int out_port_idx;
-        bool may_learn;
-
-        if (!packet) {
-            /* Don't try to learn from revalidation. */
-            may_learn = false;
-        } else if (in_port->n_ifaces > 1) {
-            /* If the packet arrived on a bonded port, don't learn from it
-             * unless we haven't learned any port at all for that address
-             * (because we probably sent the packet on one bonded interface and
-             * got it back on the other).  Broadcast ARP replies are an
-             * exception to this rule: the host has moved to another switch. */
-            int src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan);
-            may_learn = (src_idx < 0
-                         || src_idx == in_port->port_idx
-                         || is_bcast_arp_reply(flow, packet));
-        } else {
-            may_learn = true;
-        }
 
-        /* Learn source MAC. */
-        if (may_learn) {
+        /* Learn source MAC (but don't try to learn from revalidation). */
+        if (packet) {
             tag_type rev_tag = mac_learning_learn(br->ml, flow->dl_src,
                                                   vlan, in_port->port_idx);
             if (rev_tag) {