ofproto: Add support for master/slave controller coordination.
[sliver-openvswitch.git] / vswitchd / bridge.c
index 187115c..cf02122 100644 (file)
@@ -137,6 +137,7 @@ struct port {
     int updelay, downdelay;     /* Delay before iface goes up/down, in ms. */
     bool bond_compat_is_stale;  /* Need to call port_update_bond_compat()? */
     bool bond_fake_iface;       /* Fake a bond interface for legacy compat? */
+    long bond_next_fake_iface_update; /* Next update to fake bond stats. */
     int bond_rebalance_interval; /* Interval between rebalances, in ms. */
     long long int bond_next_rebalance; /* Next rebalancing time. */
 
@@ -157,11 +158,6 @@ struct bridge {
     bool sent_config_request;   /* Successfully sent config request? */
     uint8_t default_ea[ETH_ADDR_LEN]; /* Default MAC. */
 
-    /* Support for remote controllers. */
-    char *controller;           /* NULL if there is no remote controller;
-                                 * "discover" to do controller discovery;
-                                 * otherwise a vconn name. */
-
     /* OpenFlow switch processing. */
     struct ofproto *ofproto;    /* OpenFlow switch. */
 
@@ -207,9 +203,9 @@ static void bridge_destroy(struct bridge *);
 static struct bridge *bridge_lookup(const char *name);
 static unixctl_cb_func bridge_unixctl_dump_flows;
 static int bridge_run_one(struct bridge *);
-static const struct ovsrec_controller *bridge_get_controller(
-                      const struct ovsrec_open_vswitch *ovs_cfg,
-                      const struct bridge *br);
+static size_t bridge_get_controllers(const struct ovsrec_open_vswitch *ovs_cfg,
+                                     const struct bridge *br,
+                                     struct ovsrec_controller ***controllersp);
 static void bridge_reconfigure_one(const struct ovsrec_open_vswitch *,
                                    struct bridge *);
 static void bridge_reconfigure_controller(const struct ovsrec_open_vswitch *,
@@ -744,8 +740,10 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
         /* Set sFlow configuration on this bridge. */
         if (br->cfg->sflow) {
             const struct ovsrec_sflow *sflow_cfg = br->cfg->sflow;
-            const struct ovsrec_controller *ctrl;
+            struct ovsrec_controller **controllers;
             struct ofproto_sflow_options oso;
+            size_t n_controllers;
+            size_t i;
 
             memset(&oso, 0, sizeof oso);
 
@@ -770,8 +768,14 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
             oso.sub_id = sflow_bridge_number++;
             oso.agent_device = sflow_cfg->agent;
 
-            ctrl = bridge_get_controller(ovs_cfg, br);
-            oso.control_ip = ctrl ? ctrl->local_ip : NULL;
+            oso.control_ip = NULL;
+            n_controllers = bridge_get_controllers(ovs_cfg, br, &controllers);
+            for (i = 0; i < n_controllers; i++) {
+                if (controllers[i]->local_ip) {
+                    oso.control_ip = controllers[i]->local_ip;
+                    break;
+                }
+            }
             ofproto_set_sflow(br->ofproto, &oso);
 
             svec_destroy(&oso.targets);
@@ -1051,7 +1055,7 @@ bridge_wait(void)
 
     LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
         ofproto_wait(br->ofproto);
-        if (br->controller) {
+        if (ofproto_has_controller(br->ofproto)) {
             continue;
         }
 
@@ -1181,7 +1185,6 @@ bridge_destroy(struct bridge *br)
         }
         dpif_close(br->dpif);
         ofproto_destroy(br->ofproto);
-        free(br->controller);
         mac_learning_destroy(br->ml);
         port_array_destroy(&br->ifaces);
         free(br->ports);
@@ -1257,21 +1260,31 @@ bridge_run_one(struct bridge *br)
     return error;
 }
 
-static const struct ovsrec_controller *
-bridge_get_controller(const struct ovsrec_open_vswitch *ovs_cfg,
-                      const struct bridge *br)
+static size_t
+bridge_get_controllers(const struct ovsrec_open_vswitch *ovs_cfg,
+                       const struct bridge *br,
+                       struct ovsrec_controller ***controllersp)
 {
-    const struct ovsrec_controller *controller;
+    struct ovsrec_controller **controllers;
+    size_t n_controllers;
 
-    controller = (br->cfg->controller ? br->cfg->controller
-                  : ovs_cfg->controller ? ovs_cfg->controller
-                  : NULL);
+    if (br->cfg->n_controller) {
+        controllers = br->cfg->controller;
+        n_controllers = br->cfg->n_controller;
+    } else {
+        controllers = ovs_cfg->controller;
+        n_controllers = ovs_cfg->n_controller;
+    }
 
-    if (controller && !strcmp(controller->target, "none")) {
-        return NULL;
+    if (n_controllers == 1 && !strcmp(controllers[0]->target, "none")) {
+        controllers = NULL;
+        n_controllers = 0;
     }
 
-    return controller;
+    if (controllersp) {
+        *controllersp = controllers;
+    }
+    return n_controllers;
 }
 
 static bool
@@ -1390,7 +1403,7 @@ bridge_reconfigure_one(const struct ovsrec_open_vswitch *ovs_cfg,
      * user didn't specify one.
      *
      * XXX perhaps we should synthesize a port ourselves in this case. */
-    if (bridge_get_controller(ovs_cfg, br)) {
+    if (bridge_get_controllers(ovs_cfg, br, NULL)) {
         char local_name[IF_NAMESIZE];
         int error;
 
@@ -1505,84 +1518,21 @@ static void
 bridge_reconfigure_controller(const struct ovsrec_open_vswitch *ovs_cfg,
                               struct bridge *br)
 {
-    const struct ovsrec_controller *c;
+    struct ovsrec_controller **controllers;
+    size_t n_controllers;
 
-    c = bridge_get_controller(ovs_cfg, br);
-    if ((br->controller != NULL) != (c != NULL)) {
+    n_controllers = bridge_get_controllers(ovs_cfg, br, &controllers);
+    if (ofproto_has_controller(br->ofproto) != (n_controllers != 0)) {
         ofproto_flush_flows(br->ofproto);
     }
-    free(br->controller);
-    br->controller = c ? xstrdup(c->target) : NULL;
-
-    if (c) {
-        int max_backoff, probe;
-        int rate_limit, burst_limit;
-
-        if (!strcmp(c->target, "discover")) {
-            ofproto_set_discovery(br->ofproto, true,
-                                  c->discover_accept_regex,
-                                  c->discover_update_resolv_conf);
-        } else {
-            struct iface *local_iface;
-            struct in_addr ip;
-            bool in_band;
-
-            in_band = (!c->connection_mode
-                       || !strcmp(c->connection_mode, "out-of-band"));
-            ofproto_set_discovery(br->ofproto, false, NULL, NULL);
-            ofproto_set_in_band(br->ofproto, in_band);
-
-            local_iface = bridge_get_local_iface(br);
-            if (local_iface && c->local_ip && inet_aton(c->local_ip, &ip)) {
-                struct netdev *netdev = local_iface->netdev;
-                struct in_addr mask, gateway;
-
-                if (!c->local_netmask || !inet_aton(c->local_netmask, &mask)) {
-                    mask.s_addr = 0;
-                }
-                if (!c->local_gateway
-                    || !inet_aton(c->local_gateway, &gateway)) {
-                    gateway.s_addr = 0;
-                }
 
-                netdev_turn_flags_on(netdev, NETDEV_UP, true);
-                if (!mask.s_addr) {
-                    mask.s_addr = guess_netmask(ip.s_addr);
-                }
-                if (!netdev_set_in4(netdev, ip, mask)) {
-                    VLOG_INFO("bridge %s: configured IP address "IP_FMT", "
-                              "netmask "IP_FMT,
-                              br->name, IP_ARGS(&ip.s_addr),
-                              IP_ARGS(&mask.s_addr));
-                }
-
-                if (gateway.s_addr) {
-                    if (!netdev_add_router(netdev, gateway)) {
-                        VLOG_INFO("bridge %s: configured gateway "IP_FMT,
-                                  br->name, IP_ARGS(&gateway.s_addr));
-                    }
-                }
-            }
-        }
-
-        ofproto_set_failure(br->ofproto,
-                            (!c->fail_mode
-                             || !strcmp(c->fail_mode, "standalone")
-                             || !strcmp(c->fail_mode, "open")));
-
-        probe = c->inactivity_probe ? *c->inactivity_probe / 1000 : 5;
-        ofproto_set_probe_interval(br->ofproto, probe);
-
-        max_backoff = c->max_backoff ? *c->max_backoff / 1000 : 8;
-        ofproto_set_max_backoff(br->ofproto, max_backoff);
-
-        rate_limit = c->controller_rate_limit ? *c->controller_rate_limit : 0;
-        burst_limit = c->controller_burst_limit ? *c->controller_burst_limit : 0;
-        ofproto_set_rate_limit(br->ofproto, rate_limit, burst_limit);
-    } else {
+    if (!n_controllers) {
         union ofp_action action;
         flow_t flow;
 
+        /* Clear out controllers. */
+        ofproto_set_controllers(br->ofproto, NULL, 0);
+
         /* Set up a flow that matches every packet and directs them to
          * OFPP_NORMAL (which goes to us). */
         memset(&action, 0, sizeof action);
@@ -1590,16 +1540,78 @@ bridge_reconfigure_controller(const struct ovsrec_open_vswitch *ovs_cfg,
         action.output.len = htons(sizeof action);
         action.output.port = htons(OFPP_NORMAL);
         memset(&flow, 0, sizeof flow);
-        ofproto_add_flow(br->ofproto, &flow, OFPFW_ALL, 0,
-                         &action, 1, 0);
+        ofproto_add_flow(br->ofproto, &flow, OVSFW_ALL, 0, &action, 1, 0);
+    } else {
+        struct ofproto_controller *ocs;
+        size_t i;
 
-        ofproto_set_in_band(br->ofproto, false);
-        ofproto_set_max_backoff(br->ofproto, 1);
-        ofproto_set_probe_interval(br->ofproto, 5);
-        ofproto_set_failure(br->ofproto, false);
-    }
+        ocs = xmalloc(n_controllers * sizeof *ocs);
+        for (i = 0; i < n_controllers; i++) {
+            struct ovsrec_controller *c = controllers[i];
+            struct ofproto_controller *oc = &ocs[i];
+
+            if (strcmp(c->target, "discover")) {
+                struct iface *local_iface;
+                struct in_addr ip;
+
+                local_iface = bridge_get_local_iface(br);
+                if (local_iface && c->local_ip
+                    && inet_aton(c->local_ip, &ip)) {
+                    struct netdev *netdev = local_iface->netdev;
+                    struct in_addr mask, gateway;
+
+                    if (!c->local_netmask
+                        || !inet_aton(c->local_netmask, &mask)) {
+                        mask.s_addr = 0;
+                    }
+                    if (!c->local_gateway
+                        || !inet_aton(c->local_gateway, &gateway)) {
+                        gateway.s_addr = 0;
+                    }
+
+                    netdev_turn_flags_on(netdev, NETDEV_UP, true);
+                    if (!mask.s_addr) {
+                        mask.s_addr = guess_netmask(ip.s_addr);
+                    }
+                    if (!netdev_set_in4(netdev, ip, mask)) {
+                        VLOG_INFO("bridge %s: configured IP address "IP_FMT", "
+                                  "netmask "IP_FMT,
+                                  br->name, IP_ARGS(&ip.s_addr),
+                                  IP_ARGS(&mask.s_addr));
+                    }
+
+                    if (gateway.s_addr) {
+                        if (!netdev_add_router(netdev, gateway)) {
+                            VLOG_INFO("bridge %s: configured gateway "IP_FMT,
+                                      br->name, IP_ARGS(&gateway.s_addr));
+                        }
+                    }
+                }
+            }
 
-    ofproto_set_controller(br->ofproto, br->controller);
+            oc->target = c->target;
+            oc->max_backoff = c->max_backoff ? *c->max_backoff / 1000 : 8;
+            oc->probe_interval = (c->inactivity_probe
+                                 ? *c->inactivity_probe / 1000 : 5);
+            oc->fail = (!c->fail_mode
+                       || !strcmp(c->fail_mode, "standalone")
+                       || !strcmp(c->fail_mode, "open")
+                       ? OFPROTO_FAIL_STANDALONE
+                       : OFPROTO_FAIL_SECURE);
+            oc->band = (!c->connection_mode
+                       || !strcmp(c->connection_mode, "in-band")
+                       ? OFPROTO_IN_BAND
+                       : OFPROTO_OUT_OF_BAND);
+            oc->accept_re = c->discover_accept_regex;
+            oc->update_resolv_conf = c->discover_update_resolv_conf;
+            oc->rate_limit = (c->controller_rate_limit
+                             ? *c->controller_rate_limit : 0);
+            oc->burst_limit = (c->controller_burst_limit
+                              ? *c->controller_burst_limit : 0);
+        }
+        ofproto_set_controllers(br->ofproto, ocs, n_controllers);
+        free(ocs);
+    }
 }
 
 static void
@@ -1848,6 +1860,34 @@ bond_enable_slave(struct iface *iface, bool enable)
     port->bond_compat_is_stale = true;
 }
 
+/* Attempts to make the sum of the bond slaves' statistics appear on the fake
+ * bond interface. */
+static void
+bond_update_fake_iface_stats(struct port *port)
+{
+    struct netdev_stats bond_stats;
+    struct netdev *bond_dev;
+    size_t i;
+
+    memset(&bond_stats, 0, sizeof bond_stats);
+
+    for (i = 0; i < port->n_ifaces; i++) {
+        struct netdev_stats slave_stats;
+
+        if (!netdev_get_stats(port->ifaces[i]->netdev, &slave_stats)) {
+            bond_stats.rx_packets += slave_stats.rx_packets;
+            bond_stats.rx_bytes += slave_stats.rx_bytes;
+            bond_stats.tx_packets += slave_stats.tx_packets;
+            bond_stats.tx_bytes += slave_stats.tx_bytes;
+        }
+    }
+
+    if (!netdev_open_default(port->name, &bond_dev)) {
+        netdev_set_stats(bond_dev, &bond_stats);
+        netdev_close(bond_dev);
+    }
+}
+
 static void
 bond_run(struct bridge *br)
 {
@@ -1863,6 +1903,12 @@ bond_run(struct bridge *br)
                     bond_enable_slave(iface, !iface->enabled);
                 }
             }
+
+            if (port->bond_fake_iface
+                && time_msec() >= port->bond_next_fake_iface_update) {
+                bond_update_fake_iface_stats(port);
+                port->bond_next_fake_iface_update = time_msec() + 1000;
+            }
         }
 
         if (port->bond_compat_is_stale) {
@@ -1888,6 +1934,9 @@ bond_wait(struct bridge *br)
                 poll_timer_wait(iface->delay_expires - time_msec());
             }
         }
+        if (port->bond_fake_iface) {
+            poll_timer_wait(port->bond_next_fake_iface_update - time_msec());
+        }
     }
 }
 
@@ -2171,25 +2220,39 @@ is_bcast_arp_reply(const flow_t *flow)
             && eth_addr_is_broadcast(flow->dl_dst));
 }
 
-/* If the composed actions may be applied to any packet in the given 'flow',
- * returns true.  Otherwise, the actions should only be applied to 'packet', or
- * not at all, if 'packet' was NULL. */
+/* Determines whether packets in 'flow' within 'br' should be forwarded or
+ * dropped.  Returns true if they may be forwarded, false if they should be
+ * dropped.
+ *
+ * If 'have_packet' is true, it indicates that the caller is processing a
+ * received packet.  If 'have_packet' is false, then the caller is just
+ * revalidating an existing flow because configuration has changed.  Either
+ * way, 'have_packet' only affects logging (there is no point in logging errors
+ * during revalidation).
+ *
+ * Sets '*in_portp' to the input port.  This will be a null pointer if
+ * flow->in_port does not designate a known input port (in which case
+ * is_admissible() returns false).
+ *
+ * When returning true, sets '*vlanp' to the effective VLAN of the input
+ * packet, as returned by flow_get_vlan().
+ *
+ * May also add tags to '*tags', although the current implementation only does
+ * so in one special case.
+ */
 static bool
-process_flow(struct bridge *br, const flow_t *flow,
-             const struct ofpbuf *packet, struct odp_actions *actions,
-             tag_type *tags, uint16_t *nf_output_iface)
+is_admissible(struct bridge *br, const flow_t *flow, bool have_packet,
+              tag_type *tags, int *vlanp, struct port **in_portp)
 {
     struct iface *in_iface;
     struct port *in_port;
-    struct port *out_port = NULL; /* By default, drop the packet/flow. */
     int vlan;
-    int out_port_idx;
 
     /* Find the interface and port structure for the received packet. */
     in_iface = iface_from_dp_ifidx(br, flow->in_port);
     if (!in_iface) {
         /* No interface?  Something fishy... */
-        if (packet != NULL) {
+        if (have_packet) {
             /* Odd.  A few possible reasons here:
              *
              * - We deleted an interface but there are still a few packets
@@ -2207,29 +2270,29 @@ process_flow(struct bridge *br, const flow_t *flow,
                          "interface %"PRIu16, br->name, flow->in_port); 
         }
 
-        /* Return without adding any actions, to drop packets on this flow. */
-        return true;
+        *in_portp = NULL;
+        return false;
     }
-    in_port = in_iface->port;
-    vlan = flow_get_vlan(br, flow, in_port, !!packet);
+    *in_portp = in_port = in_iface->port;
+    *vlanp = vlan = flow_get_vlan(br, flow, in_port, have_packet);
     if (vlan < 0) {
-        goto done;
+        return false;
     }
 
     /* Drop frames for reserved multicast addresses. */
     if (eth_addr_is_reserved(flow->dl_dst)) {
-        goto done;
+        return false;
     }
 
     /* Drop frames on ports reserved for mirroring. */
     if (in_port->is_mirror_output_port) {
-        if (packet) {
+        if (have_packet) {
             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
             VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port "
                          "%s, which is reserved exclusively for mirroring",
                          br->name, in_port->name);
         }
-        goto done;
+        return false;
     }
 
     /* Packets received on bonds need special attention to avoid duplicates. */
@@ -2240,7 +2303,7 @@ process_flow(struct bridge *br, const flow_t *flow,
             *tags |= in_port->active_iface_tag;
             if (in_port->active_iface != in_iface->port_ifidx) {
                 /* Drop all multicast packets on inactive slaves. */
-                goto done;
+                return false;
             }
         }
 
@@ -2251,10 +2314,32 @@ process_flow(struct bridge *br, const flow_t *flow,
         src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan);
         if (src_idx != -1 && src_idx != in_port->port_idx &&
             !is_bcast_arp_reply(flow)) {
-                goto done;
+                return false;
         }
     }
 
+    return true;
+}
+
+/* If the composed actions may be applied to any packet in the given 'flow',
+ * returns true.  Otherwise, the actions should only be applied to 'packet', or
+ * not at all, if 'packet' was NULL. */
+static bool
+process_flow(struct bridge *br, const flow_t *flow,
+             const struct ofpbuf *packet, struct odp_actions *actions,
+             tag_type *tags, uint16_t *nf_output_iface)
+{
+    struct port *in_port;
+    struct port *out_port;
+    int vlan;
+    int out_port_idx;
+
+    /* Check whether we should drop packets in this flow. */
+    if (!is_admissible(br, flow, packet != NULL, tags, &vlan, &in_port)) {
+        out_port = NULL;
+        goto done;
+    }
+
     /* Learn source MAC (but don't try to learn from revalidation). */
     if (packet) {
         update_learning_table(br, flow, vlan, in_port);
@@ -2281,8 +2366,10 @@ process_flow(struct bridge *br, const flow_t *flow,
     }
 
 done:
-    compose_actions(br, flow, vlan, in_port, out_port, tags, actions,
-                    nf_output_iface);
+    if (in_port) {
+        compose_actions(br, flow, vlan, in_port, out_port, tags, actions,
+                        nf_output_iface);
+    }
 
     return true;
 }
@@ -2342,18 +2429,16 @@ bridge_account_flow_ofhook_cb(const flow_t *flow,
                               void *br_)
 {
     struct bridge *br = br_;
-    struct port *in_port;
     const union odp_action *a;
+    struct port *in_port;
+    tag_type tags = 0;
+    int vlan;
 
     /* Feed information from the active flows back into the learning table
      * to ensure that table is always in sync with what is actually flowing
      * through the datapath. */
-    in_port = port_from_dp_ifidx(br, flow->in_port);
-    if (in_port) {
-        int vlan = flow_get_vlan(br, flow, in_port, false);
-         if (vlan >= 0) {
-            update_learning_table(br, flow, vlan, in_port);
-        }
+    if (is_admissible(br, flow, false, &tags, &vlan, &in_port)) {
+        update_learning_table(br, flow, vlan, in_port);
     }
 
     if (!br->has_bonded_ports) {
@@ -2743,7 +2828,7 @@ bond_send_learning_packets(struct port *port)
         n_packets++;
         compose_benign_packet(&packet, "Open vSwitch Bond Failover", 0xf177,
                               e->mac);
-        flow_extract(&packet, ODPP_NONE, &flow);
+        flow_extract(&packet, 0, ODPP_NONE, &flow);
         retval = ofproto_send_packet(br->ofproto, &flow, actions, a - actions,
                                      &packet);
         if (retval) {
@@ -3329,6 +3414,10 @@ port_update_bonding(struct port *port)
             bond_choose_active_iface(port);
             port->bond_next_rebalance
                 = time_msec() + port->bond_rebalance_interval;
+
+            if (port->cfg->bond_fake_iface) {
+                port->bond_next_fake_iface_update = time_msec();
+            }
         }
         port->bond_compat_is_stale = true;
         port->bond_fake_iface = port->cfg->bond_fake_iface;