bridge: Fix segfault when updating learning table for deleted port.

[sliver-openvswitch.git] / vswitchd / bridge.c
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c

index fcdd866..fb6a141 100644 (file)
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -147,7 +147,7 @@ struct port {
  struct bridge {
      struct list node;           /* Node in global list of bridges. */
      char *name;                 /* User-specified arbitrary name. */
-    struct mac_learning *ml;    /* MAC learning table, or null not to learn. */
+    struct mac_learning *ml;    /* MAC learning table. */
      bool sent_config_request;   /* Successfully sent config request? */
      uint8_t default_ea[ETH_ADDR_LEN]; /* Default MAC. */
  
@@ -216,6 +216,7 @@ static void bond_run(struct bridge *);
  static void bond_wait(struct bridge *);
  static void bond_rebalance_port(struct port *);
  static void bond_send_learning_packets(struct port *);
+static void bond_enable_slave(struct iface *iface, bool enable);
  
  static void port_create(struct bridge *, const char *name);
  static void port_reconfigure(struct port *);
@@ -495,10 +496,7 @@ bridge_reconfigure(void)
          uint64_t dpid;
          struct iface *local_iface = NULL;
          const char *devname;
-        uint8_t engine_type = br->dpif.minor;
-        uint8_t engine_id = br->dpif.minor;
-        bool add_id_to_iface = false;
-        struct svec nf_hosts;
+        struct netflow_options nf_options;
  
          bridge_fetch_dp_ifaces(br);
          for (i = 0; i < br->n_ports; ) {
@@ -543,35 +541,46 @@ bridge_reconfigure(void)
          ofproto_set_datapath_id(br->ofproto, dpid);
  
          /* Set NetFlow configuration on this bridge. */
+        memset(&nf_options, 0, sizeof nf_options);
+        nf_options.engine_type = br->dpif.minor;
+        nf_options.engine_id = br->dpif.minor;
+        nf_options.active_timeout = -1;
+
          if (cfg_has("netflow.%s.engine-type", br->name)) {
-            engine_type = cfg_get_int(0, "netflow.%s.engine-type", 
+            nf_options.engine_type = cfg_get_int(0, "netflow.%s.engine-type", 
                      br->name);
          }
          if (cfg_has("netflow.%s.engine-id", br->name)) {
-            engine_id = cfg_get_int(0, "netflow.%s.engine-id", br->name);
+            nf_options.engine_id = cfg_get_int(0, "netflow.%s.engine-id",
+                                               br->name);
+        }
+        if (cfg_has("netflow.%s.active-timeout", br->name)) {
+            nf_options.active_timeout = cfg_get_int(0,
+                                                    "netflow.%s.active-timeout",
+                                                    br->name);
          }
          if (cfg_has("netflow.%s.add-id-to-iface", br->name)) {
-            add_id_to_iface = cfg_get_bool(0, "netflow.%s.add-id-to-iface",
-                    br->name);
+            nf_options.add_id_to_iface = cfg_get_bool(0,
+                                                   "netflow.%s.add-id-to-iface",
+                                                    br->name);
          }
-        if (add_id_to_iface && engine_id > 0x7f) {
+        if (nf_options.add_id_to_iface && nf_options.engine_id > 0x7f) {
              VLOG_WARN("bridge %s: netflow port mangling may conflict with "
                      "another vswitch, choose an engine id less than 128", 
                      br->name);
          }
-        if (add_id_to_iface && br->n_ports > 0x1ff) {
+        if (nf_options.add_id_to_iface && br->n_ports > 508) {
              VLOG_WARN("bridge %s: netflow port mangling will conflict with "
-                    "another port when 512 or more ports are used", 
+                    "another port when more than 508 ports are used", 
                      br->name);
          }
-        svec_init(&nf_hosts);
-        cfg_get_all_keys(&nf_hosts, "netflow.%s.host", br->name);
-        if (ofproto_set_netflow(br->ofproto, &nf_hosts,  engine_type, 
-                    engine_id, add_id_to_iface)) {
+        svec_init(&nf_options.collectors);
+        cfg_get_all_keys(&nf_options.collectors, "netflow.%s.host", br->name);
+        if (ofproto_set_netflow(br->ofproto, &nf_options)) {
              VLOG_ERR("bridge %s: problem setting netflow collectors", 
                      br->name);
          }
-        svec_destroy(&nf_hosts);
+        svec_destroy(&nf_options.collectors);
  
          /* Update the controller and related settings.  It would be more
           * straightforward to call this from bridge_reconfigure_one(), but we
@@ -837,9 +846,7 @@ bridge_wait(void)
              continue;
          }
  
-        if (br->ml) {
-            mac_learning_wait(br->ml);
-        }
+        mac_learning_wait(br->ml);
          bond_wait(br);
          brstp_wait(br);
      }
@@ -852,9 +859,7 @@ bridge_flush(struct bridge *br)
  {
      COVERAGE_INC(bridge_flush);
      br->flush = true;
-    if (br->ml) {
-        mac_learning_flush(br->ml);
-    }
+    mac_learning_flush(br->ml);
  }
  \f
  /* Bridge unixctl user interface functions. */
@@ -863,6 +868,7 @@ bridge_unixctl_fdb_show(struct unixctl_conn *conn, const char *args)
  {
      struct ds ds = DS_EMPTY_INITIALIZER;
      const struct bridge *br;
+    const struct mac_entry *e;
  
      br = bridge_lookup(args);
      if (!br) {
@@ -871,16 +877,13 @@ bridge_unixctl_fdb_show(struct unixctl_conn *conn, const char *args)
      }
  
      ds_put_cstr(&ds, " port  VLAN  MAC                Age\n");
-    if (br->ml) {
-        const struct mac_entry *e;
-        LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
-            if (e->port < 0 || e->port >= br->n_ports) {
-                continue;
-            }
-            ds_put_format(&ds, "%5d  %4d  "ETH_ADDR_FMT"  %3d\n",
-                          br->ports[e->port]->ifaces[0]->dp_ifidx,
-                          e->vlan, ETH_ADDR_ARGS(e->mac), mac_entry_age(e));
+    LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
+        if (e->port < 0 || e->port >= br->n_ports) {
+            continue;
          }
+        ds_put_format(&ds, "%5d  %4d  "ETH_ADDR_FMT"  %3d\n",
+                      br->ports[e->port]->ifaces[0]->dp_ifidx,
+                      e->vlan, ETH_ADDR_ARGS(e->mac), mac_entry_age(e));
      }
      unixctl_command_reply(conn, 200, ds_cstr(&ds));
      ds_destroy(&ds);
@@ -1022,9 +1025,7 @@ bridge_run_one(struct bridge *br)
          return error;
      }
  
-    if (br->ml) {
-        mac_learning_run(br->ml, ofproto_get_revalidate_set(br->ofproto));
-    }
+    mac_learning_run(br->ml, ofproto_get_revalidate_set(br->ofproto));
      bond_run(br);
      brstp_run(br);
  
@@ -1408,13 +1409,31 @@ lookup_bond_entry(const struct port *port, const uint8_t mac[ETH_ADDR_LEN])
  static int
  bond_choose_iface(const struct port *port)
  {
-    size_t i;
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+    size_t i, best_down_slave = -1;
+    long long next_delay_expiration = LLONG_MAX;
+
      for (i = 0; i < port->n_ifaces; i++) {
-        if (port->ifaces[i]->enabled) {
+        struct iface *iface = port->ifaces[i];
+
+        if (iface->enabled) {
              return i;
+        } else if (iface->delay_expires < next_delay_expiration) {
+            best_down_slave = i;
+            next_delay_expiration = iface->delay_expires;
          }
      }
-    return -1;
+
+    if (best_down_slave != -1) {
+        struct iface *iface = port->ifaces[best_down_slave];
+
+        VLOG_INFO_RL(&rl, "interface %s: skipping remaining %lli ms updelay "
+                     "since no other interface is up", iface->name,
+                     iface->delay_expires - time_msec());
+        bond_enable_slave(iface, true);
+    }
+
+    return best_down_slave;
  }
  
  static bool
@@ -1464,10 +1483,12 @@ bond_link_status_update(struct iface *iface, bool carrier)
          iface->delay_expires = LLONG_MAX;
          VLOG_INFO_RL(&rl, "interface %s: will not be %s",
                       iface->name, carrier ? "disabled" : "enabled");
-    } else if (carrier && port->updelay && port->active_iface < 0) {
-        iface->delay_expires = time_msec();
-        VLOG_INFO_RL(&rl, "interface %s: skipping %d ms updelay since no "
-                     "other interface is up", iface->name, port->updelay);
+    } else if (carrier && port->active_iface < 0) {
+        bond_enable_slave(iface, true);
+        if (port->updelay) {
+            VLOG_INFO_RL(&rl, "interface %s: skipping %d ms updelay since no "
+                         "other interface is up", iface->name, port->updelay);
+        }
      } else {
          int delay = carrier ? port->updelay : port->downdelay;
          iface->delay_expires = time_msec() + delay;
@@ -1504,6 +1525,12 @@ bond_enable_slave(struct iface *iface, bool enable)
      struct port *port = iface->port;
      struct bridge *br = port->bridge;
  
+    /* This acts as a recursion check.  If the act of disabling a slave
+     * causes a different slave to be enabled, the flag will allow us to
+     * skip redundant work when we reenter this function.  It must be
+     * cleared on exit to keep things safe with multiple bonds. */
+    static bool moving_active_iface = false;
+
      iface->delay_expires = LLONG_MAX;
      if (enable == iface->enabled) {
          return;
@@ -1516,19 +1543,29 @@ bond_enable_slave(struct iface *iface, bool enable)
          if (iface->port_ifidx == port->active_iface) {
              ofproto_revalidate(br->ofproto,
                                 port->active_iface_tag);
+
+            /* Disabling a slave can lead to another slave being immediately
+             * enabled if there will be no active slaves but one is waiting
+             * on an updelay.  In this case we do not need to run most of the
+             * code for the newly enabled slave since there was no period
+             * without an active slave and it is redundant with the disabling
+             * path. */
+            moving_active_iface = true;
              bond_choose_active_iface(port);
          }
          bond_send_learning_packets(port);
      } else {
          VLOG_WARN("interface %s: enabled", iface->name);
-        if (port->active_iface < 0) {
+        if (port->active_iface < 0 && !moving_active_iface) {
              ofproto_revalidate(br->ofproto, port->no_ifaces_tag);
              bond_choose_active_iface(port);
              bond_send_learning_packets(port);
          }
          iface->tag = tag_create_random();
      }
-    port_update_bond_compat(port);
+
+    moving_active_iface = false;
+    port->bond_compat_is_stale = true;
  }
  
  static void
@@ -1539,20 +1576,19 @@ bond_run(struct bridge *br)
      for (i = 0; i < br->n_ports; i++) {
          struct port *port = br->ports[i];
  
+        if (port->n_ifaces >= 2) {
+            for (j = 0; j < port->n_ifaces; j++) {
+                struct iface *iface = port->ifaces[j];
+                if (time_msec() >= iface->delay_expires) {
+                    bond_enable_slave(iface, !iface->enabled);
+                }
+            }
+        }
+
          if (port->bond_compat_is_stale) {
              port->bond_compat_is_stale = false;
              port_update_bond_compat(port);
          }
-
-        if (port->n_ifaces < 2) {
-            continue;
-        }
-        for (j = 0; j < port->n_ifaces; j++) {
-            struct iface *iface = port->ifaces[j];
-            if (time_msec() >= iface->delay_expires) {
-                bond_enable_slave(iface, !iface->enabled);
-            }
-        }
      }
  }
  
@@ -1795,6 +1831,71 @@ compose_actions(struct bridge *br, const flow_t *flow, uint16_t vlan,
      }
  }
  
+/* Returns the effective vlan of a packet, taking into account both the
+ * 802.1Q header and implicitly tagged ports.  A value of 0 indicates that
+ * the packet is untagged and -1 indicates it has an invalid header and
+ * should be dropped. */
+static int flow_get_vlan(struct bridge *br, const flow_t *flow,
+                         struct port *in_port, bool have_packet)
+{
+    /* Note that dl_vlan of 0 and of OFP_VLAN_NONE both mean that the packet
+     * belongs to VLAN 0, so we should treat both cases identically.  (In the
+     * former case, the packet has an 802.1Q header that specifies VLAN 0,
+     * presumably to allow a priority to be specified.  In the latter case, the
+     * packet does not have any 802.1Q header.) */
+    int vlan = ntohs(flow->dl_vlan);
+    if (vlan == OFP_VLAN_NONE) {
+        vlan = 0;
+    }
+    if (in_port->vlan >= 0) {
+        if (vlan) {
+            /* XXX support double tagging? */
+            if (have_packet) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+                VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %"PRIu16" tagged "
+                             "packet received on port %s configured with "
+                             "implicit VLAN %"PRIu16,
+                             br->name, ntohs(flow->dl_vlan),
+                             in_port->name, in_port->vlan);
+            }
+            return -1;
+        }
+        vlan = in_port->vlan;
+    } else {
+        if (!port_includes_vlan(in_port, vlan)) {
+            if (have_packet) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+                VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %d tagged "
+                             "packet received on port %s not configured for "
+                             "trunking VLAN %d",
+                             br->name, vlan, in_port->name, vlan);
+            }
+            return -1;
+        }
+    }
+
+    return vlan;
+}
+
+static void
+update_learning_table(struct bridge *br, const flow_t *flow, int vlan,
+                      struct port *in_port)
+{
+    tag_type rev_tag = mac_learning_learn(br->ml, flow->dl_src,
+                                          vlan, in_port->port_idx);
+    if (rev_tag) {
+        /* The log messages here could actually be useful in debugging,
+         * so keep the rate limit relatively high. */
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30,
+                                                                300);
+        VLOG_DBG_RL(&rl, "bridge %s: learned that "ETH_ADDR_FMT" is "
+                    "on port %s in VLAN %d",
+                    br->name, ETH_ADDR_ARGS(flow->dl_src),
+                    in_port->name, vlan);
+        ofproto_revalidate(br->ofproto, rev_tag);
+    }
+}
+
  static bool
  is_bcast_arp_reply(const flow_t *flow)
  {
@@ -1815,6 +1916,7 @@ process_flow(struct bridge *br, const flow_t *flow,
      struct port *in_port;
      struct port *out_port = NULL; /* By default, drop the packet/flow. */
      int vlan;
+    int out_port_idx;
  
      /* Find the interface and port structure for the received packet. */
      in_iface = iface_from_dp_ifidx(br, flow->in_port);
@@ -1842,41 +1944,9 @@ process_flow(struct bridge *br, const flow_t *flow,
          return true;
      }
      in_port = in_iface->port;
-
-    /* Figure out what VLAN this packet belongs to.
-     *
-     * Note that dl_vlan of 0 and of OFP_VLAN_NONE both mean that the packet
-     * belongs to VLAN 0, so we should treat both cases identically.  (In the
-     * former case, the packet has an 802.1Q header that specifies VLAN 0,
-     * presumably to allow a priority to be specified.  In the latter case, the
-     * packet does not have any 802.1Q header.) */
-    vlan = ntohs(flow->dl_vlan);
-    if (vlan == OFP_VLAN_NONE) {
-        vlan = 0;
-    }
-    if (in_port->vlan >= 0) {
-        if (vlan) {
-            /* XXX support double tagging? */
-            if (packet != NULL) {
-                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
-                VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %"PRIu16" tagged "
-                             "packet received on port %s configured with "
-                             "implicit VLAN %"PRIu16,
-                             br->name, ntohs(flow->dl_vlan),
-                             in_port->name, in_port->vlan);
-            }
-            goto done;
-        }
-        vlan = in_port->vlan;
-    } else {
-        if (!port_includes_vlan(in_port, vlan)) {
-            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
-            VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %d tagged "
-                         "packet received on port %s not configured for "
-                         "trunking VLAN %d",
-                         br->name, vlan, in_port->name, vlan);
-            goto done;
-        }
+    vlan = flow_get_vlan(br, flow, in_port, !!packet);
+    if (vlan < 0) {
+        goto done;
      }
  
      /* Drop frames for ports that STP wants entirely killed (both for
@@ -1925,37 +1995,23 @@ process_flow(struct bridge *br, const flow_t *flow,
  
      /* MAC learning. */
      out_port = FLOOD_PORT;
-    if (br->ml) {
-        int out_port_idx;
-
-        /* Learn source MAC (but don't try to learn from revalidation). */
-        if (packet) {
-            tag_type rev_tag = mac_learning_learn(br->ml, flow->dl_src,
-                                                  vlan, in_port->port_idx);
-            if (rev_tag) {
-                /* The log messages here could actually be useful in debugging,
-                 * so keep the rate limit relatively high. */
-                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30,
-                                                                        300);
-                VLOG_DBG_RL(&rl, "bridge %s: learned that "ETH_ADDR_FMT" is "
-                            "on port %s in VLAN %d",
-                            br->name, ETH_ADDR_ARGS(flow->dl_src),
-                            in_port->name, vlan);
-                ofproto_revalidate(br->ofproto, rev_tag);
-            }
-        }
-
-        /* Determine output port. */
-        out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan,
-                                               tags);
-        if (out_port_idx >= 0 && out_port_idx < br->n_ports) {
-            out_port = br->ports[out_port_idx];
-        } else if (!packet) {
-            /* If we are revalidating but don't have a learning entry then
-             * eject the flow.  Installing a flow that floods packets will
-             * prevent us from seeing future packets and learning properly. */
-            return false;
-        }
+    /* Learn source MAC (but don't try to learn from revalidation). */
+    if (packet) {
+        update_learning_table(br, flow, vlan, in_port);
+    }
+
+    /* Determine output port. */
+    out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan,
+                                           tags);
+    if (out_port_idx >= 0 && out_port_idx < br->n_ports) {
+        out_port = br->ports[out_port_idx];
+    } else if (!packet && !eth_addr_is_multicast(flow->dl_dst)) {
+        /* If we are revalidating but don't have a learning entry then
+         * eject the flow.  Installing a flow that floods packets opens
+         * up a window of time where we could learn from a packet reflected
+         * on a bond and blackhole packets before the learning table is
+         * updated to reflect the correct port. */
+        return false;
      }
  
      /* Don't send packets out their input ports.  Don't forward frames that STP
@@ -2035,17 +2091,30 @@ bridge_account_flow_ofhook_cb(const flow_t *flow,
                                void *br_)
  {
      struct bridge *br = br_;
+    struct port *in_port;
      const union odp_action *a;
  
+    /* Feed information from the active flows back into the learning table
+     * to ensure that table is always in sync with what is actually flowing
+     * through the datapath. */
+    in_port = port_from_dp_ifidx(br, flow->in_port);
+    if (in_port) {
+        int vlan = flow_get_vlan(br, flow, in_port, false);
+         if (vlan >= 0) {
+            update_learning_table(br, flow, vlan, in_port);
+        }
+    }
+
      if (!br->has_bonded_ports) {
          return;
      }
  
      for (a = actions; a < &actions[n_actions]; a++) {
          if (a->type == ODPAT_OUTPUT) {
-            struct port *port = port_from_dp_ifidx(br, a->output.port);
-            if (port && port->n_ifaces >= 2) {
-                struct bond_entry *e = lookup_bond_entry(port, flow->dl_src);
+            struct port *out_port = port_from_dp_ifidx(br, a->output.port);
+            if (out_port && out_port->n_ifaces >= 2) {
+                struct bond_entry *e = lookup_bond_entry(out_port,
+                                                         flow->dl_src);
                  e->tx_bytes += n_bytes;
              }
          }
@@ -2391,7 +2460,7 @@ bond_send_learning_packets(struct port *port)
      struct ofpbuf packet;
      int error, n_packets, n_errors;
  
-    if (!port->n_ifaces || port->active_iface < 0 || !br->ml) {
+    if (!port->n_ifaces || port->active_iface < 0) {
          return;
      }
  
@@ -2542,14 +2611,10 @@ bond_unixctl_show(struct unixctl_conn *conn, const char *args)
                  continue;
              }
  
-            ds_put_format(&ds, "\thash %d: %lld kB load\n",
+            ds_put_format(&ds, "\thash %d: %"PRIu64" kB load\n",
                            hash, be->tx_bytes / 1024);
  
              /* MACs. */
-            if (!port->bridge->ml) {
-                break;
-            }
-
              LIST_FOR_EACH (me, struct mac_entry, lru_node,
                             &port->bridge->ml->lrus) {
                  uint16_t dp_ifidx;
@@ -3236,7 +3301,8 @@ static void
  mirror_reconfigure(struct bridge *br)
  {
      struct svec old_mirrors, new_mirrors;
-    size_t i;
+    size_t i, n_rspan_vlans;
+    unsigned long *rspan_vlans;
  
      /* Collect old and new mirrors. */
      svec_init(&old_mirrors);
@@ -3285,6 +3351,29 @@ mirror_reconfigure(struct bridge *br)
              m->out_port->is_mirror_output_port = true;
          }
      }
+
+    /* Update learning disabled vlans (for RSPAN). */
+    rspan_vlans = NULL;
+    n_rspan_vlans = cfg_count("vlan.%s.disable-learning", br->name);
+    if (n_rspan_vlans) {
+        rspan_vlans = bitmap_allocate(4096);
+
+        for (i = 0; i < n_rspan_vlans; i++) {
+            int vlan = cfg_get_vlan(i, "vlan.%s.disable-learning", br->name);
+            if (vlan >= 0) {
+                bitmap_set1(rspan_vlans, vlan);
+                VLOG_INFO("bridge %s: disabling learning on vlan %d\n",
+                          br->name, vlan);
+            } else {
+                VLOG_ERR("bridge %s: invalid value '%s' for learning disabled "
+                         "VLAN", br->name,
+                       cfg_get_string(i, "vlan.%s.disable-learning", br->name));
+            }
+        }
+    }
+    if (mac_learning_set_disabled_vlans(br->ml, rspan_vlans)) {
+        bridge_flush(br);
+    }
  }
  
  static void