#include "odp-util.h"
#include "ofp-print.h"
#include "ofpbuf.h"
+#include "ofproto/netflow.h"
#include "ofproto/ofproto.h"
#include "packets.h"
#include "poll-loop.h"
tag_type active_iface_tag; /* Tag for bcast flows. */
tag_type no_ifaces_tag; /* Tag for flows when all ifaces disabled. */
int updelay, downdelay; /* Delay before iface goes up/down, in ms. */
+ bool bond_compat_is_stale; /* Need to call port_update_bond_compat()? */
/* Port mirroring info. */
mirror_mask_t src_mirrors; /* Mirrors triggered when packet received. */
struct bridge {
struct list node; /* Node in global list of bridges. */
char *name; /* User-specified arbitrary name. */
- struct mac_learning *ml; /* MAC learning table, or null not to learn. */
+ struct mac_learning *ml; /* MAC learning table. */
bool sent_config_request; /* Successfully sent config request? */
uint8_t default_ea[ETH_ADDR_LEN]; /* Default MAC. */
static struct bridge *bridge_create(const char *name);
static void bridge_destroy(struct bridge *);
static struct bridge *bridge_lookup(const char *name);
+static void bridge_unixctl_dump_flows(struct unixctl_conn *, const char *);
static int bridge_run_one(struct bridge *);
static void bridge_reconfigure_one(struct bridge *);
static void bridge_reconfigure_controller(struct bridge *);
static void bridge_flush(struct bridge *);
static void bridge_pick_local_hw_addr(struct bridge *,
uint8_t ea[ETH_ADDR_LEN],
- const char **devname);
+ struct iface **hw_addr_iface);
static uint64_t bridge_pick_datapath_id(struct bridge *,
const uint8_t bridge_ea[ETH_ADDR_LEN],
- const char *devname);
+ struct iface *hw_addr_iface);
+static struct iface *bridge_get_local_iface(struct bridge *);
static uint64_t dpid_from_hash(const void *, size_t nbytes);
static void bridge_unixctl_fdb_show(struct unixctl_conn *, const char *args);
static void bond_wait(struct bridge *);
static void bond_rebalance_port(struct port *);
static void bond_send_learning_packets(struct port *);
+static void bond_enable_slave(struct iface *iface, bool enable);
static void port_create(struct bridge *, const char *name);
static void port_reconfigure(struct port *);
static struct iface *iface_lookup(const struct bridge *, const char *name);
static struct iface *iface_from_dp_ifidx(const struct bridge *,
uint16_t dp_ifidx);
+static bool iface_is_internal(const struct bridge *, const char *name);
+static void iface_set_mac(struct iface *);
/* Hooks into ofproto processing. */
static struct ofhooks bridge_ofhooks;
unixctl_command_register("fdb/show", bridge_unixctl_fdb_show);
+ svec_init(&dpif_names);
dp_enumerate(&dpif_names);
for (i = 0; i < dpif_names.n; i++) {
const char *dpif_name = dpif_names.names[i];
dpif_close(dpif);
}
}
+ svec_destroy(&dpif_names);
+
+ unixctl_command_register("bridge/dump-flows", bridge_unixctl_dump_flows);
bond_init();
bridge_reconfigure();
}
static bool
-check_iface_dp_ifidx(struct bridge *br, struct iface *iface,
- void *local_ifacep_)
+check_iface_dp_ifidx(struct bridge *br, struct iface *iface, void *aux UNUSED)
{
- struct iface **local_ifacep = local_ifacep_;
-
if (iface->dp_ifidx >= 0) {
- if (iface->dp_ifidx == ODPP_LOCAL) {
- *local_ifacep = iface;
- }
VLOG_DBG("%s has interface %s on port %d",
dpif_name(br->dpif),
iface->name, iface->dp_ifidx);
}
static bool
-set_iface_policing(struct bridge *br UNUSED, struct iface *iface,
+set_iface_properties(struct bridge *br UNUSED, struct iface *iface,
void *aux UNUSED)
{
- int rate = cfg_get_int(0, "port.%s.ingress.policing-rate", iface->name);
- int burst = cfg_get_int(0, "port.%s.ingress.policing-burst", iface->name);
+ int rate, burst;
+
+ /* Set policing attributes. */
+ rate = cfg_get_int(0, "port.%s.ingress.policing-rate", iface->name);
+ burst = cfg_get_int(0, "port.%s.ingress.policing-burst", iface->name);
netdev_set_policing(iface->netdev, rate, burst);
+
+ /* Set MAC address of internal interfaces other than the local
+ * interface. */
+ if (iface->dp_ifidx != ODPP_LOCAL
+ && iface_is_internal(br, iface->name)) {
+ iface_set_mac(iface);
+ }
+
return true;
}
for (i = 0; i < add_ifaces.n; i++) {
const char *if_name = add_ifaces.names[i];
- int internal = cfg_get_bool(0, "iface.%s.internal", if_name);
- int flags = internal ? ODP_PORT_INTERNAL : 0;
- int error = dpif_port_add(br->dpif, if_name, flags, NULL);
- if (error == EXFULL) {
+ bool internal;
+ int error;
+
+ /* Add to datapath. */
+ internal = iface_is_internal(br, if_name);
+ error = dpif_port_add(br->dpif, if_name,
+ internal ? ODP_PORT_INTERNAL : 0, NULL);
+ if (error == EFBIG) {
VLOG_ERR("ran out of valid port numbers on %s",
dpif_name(br->dpif));
break;
LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
uint8_t ea[8];
uint64_t dpid;
- struct iface *local_iface = NULL;
- const char *devname;
- uint8_t engine_type, engine_id;
- bool add_id_to_iface = false;
- struct svec nf_hosts;
+ struct iface *local_iface;
+ struct iface *hw_addr_iface;
+ struct netflow_options nf_options;
bridge_fetch_dp_ifaces(br);
iterate_and_prune_ifaces(br, init_iface_netdev, NULL);
- local_iface = NULL;
- iterate_and_prune_ifaces(br, check_iface_dp_ifidx, &local_iface);
+ iterate_and_prune_ifaces(br, check_iface_dp_ifidx, NULL);
/* Pick local port hardware address, datapath ID. */
- bridge_pick_local_hw_addr(br, ea, &devname);
+ bridge_pick_local_hw_addr(br, ea, &hw_addr_iface);
+ local_iface = bridge_get_local_iface(br);
if (local_iface) {
- int error = netdev_nodev_set_etheraddr(local_iface->name, ea);
+ int error = netdev_set_etheraddr(local_iface->netdev, ea);
if (error) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_ERR_RL(&rl, "bridge %s: failed to set bridge "
}
}
- dpid = bridge_pick_datapath_id(br, ea, devname);
+ dpid = bridge_pick_datapath_id(br, ea, hw_addr_iface);
ofproto_set_datapath_id(br->ofproto, dpid);
/* Set NetFlow configuration on this bridge. */
- dpif_get_netflow_ids(br->dpif, &engine_type, &engine_id);
+ memset(&nf_options, 0, sizeof nf_options);
+ dpif_get_netflow_ids(br->dpif, &nf_options.engine_type,
+ &nf_options.engine_id);
+ nf_options.active_timeout = -1;
+
if (cfg_has("netflow.%s.engine-type", br->name)) {
- engine_type = cfg_get_int(0, "netflow.%s.engine-type",
+ nf_options.engine_type = cfg_get_int(0, "netflow.%s.engine-type",
br->name);
}
if (cfg_has("netflow.%s.engine-id", br->name)) {
- engine_id = cfg_get_int(0, "netflow.%s.engine-id", br->name);
+ nf_options.engine_id = cfg_get_int(0, "netflow.%s.engine-id",
+ br->name);
+ }
+ if (cfg_has("netflow.%s.active-timeout", br->name)) {
+ nf_options.active_timeout = cfg_get_int(0,
+ "netflow.%s.active-timeout",
+ br->name);
}
if (cfg_has("netflow.%s.add-id-to-iface", br->name)) {
- add_id_to_iface = cfg_get_bool(0, "netflow.%s.add-id-to-iface",
- br->name);
+ nf_options.add_id_to_iface = cfg_get_bool(0,
+ "netflow.%s.add-id-to-iface",
+ br->name);
}
- if (add_id_to_iface && engine_id > 0x7f) {
+ if (nf_options.add_id_to_iface && nf_options.engine_id > 0x7f) {
VLOG_WARN("bridge %s: netflow port mangling may conflict with "
"another vswitch, choose an engine id less than 128",
br->name);
}
- if (add_id_to_iface && br->n_ports > 0x1ff) {
+ if (nf_options.add_id_to_iface && br->n_ports > 508) {
VLOG_WARN("bridge %s: netflow port mangling will conflict with "
- "another port when 512 or more ports are used",
+ "another port when more than 508 ports are used",
br->name);
}
- svec_init(&nf_hosts);
- cfg_get_all_keys(&nf_hosts, "netflow.%s.host", br->name);
- if (ofproto_set_netflow(br->ofproto, &nf_hosts, engine_type,
- engine_id, add_id_to_iface)) {
+ svec_init(&nf_options.collectors);
+ cfg_get_all_keys(&nf_options.collectors, "netflow.%s.host", br->name);
+ if (ofproto_set_netflow(br->ofproto, &nf_options)) {
VLOG_ERR("bridge %s: problem setting netflow collectors",
br->name);
}
+ svec_destroy(&nf_options.collectors);
/* Update the controller and related settings. It would be more
* straightforward to call this from bridge_reconfigure_one(), but we
LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
for (i = 0; i < br->n_ports; i++) {
struct port *port = br->ports[i];
+
port_update_vlan_compat(port);
port_update_bonding(port);
}
}
LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
brstp_reconfigure(br);
- iterate_and_prune_ifaces(br, set_iface_policing, NULL);
+ iterate_and_prune_ifaces(br, set_iface_properties, NULL);
}
}
static void
bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN],
- const char **devname)
+ struct iface **hw_addr_iface)
{
uint64_t requested_ea;
size_t i, j;
int error;
- *devname = NULL;
+ *hw_addr_iface = NULL;
/* Did the user request a particular MAC? */
requested_ea = cfg_get_mac(0, "bridge.%s.mac", br->name);
memset(ea, 0xff, sizeof ea);
for (i = 0; i < br->n_ports; i++) {
struct port *port = br->ports[i];
+ uint8_t iface_ea[ETH_ADDR_LEN];
+ uint64_t iface_ea_u64;
+ struct iface *iface;
+
+ /* Mirror output ports don't participate. */
if (port->is_mirror_output_port) {
continue;
}
- for (j = 0; j < port->n_ifaces; j++) {
- struct iface *iface = port->ifaces[j];
- uint8_t iface_ea[ETH_ADDR_LEN];
+
+ /* Choose the MAC address to represent the port. */
+ iface_ea_u64 = cfg_get_mac(0, "port.%s.mac", port->name);
+ if (iface_ea_u64) {
+ /* User specified explicitly. */
+ eth_addr_from_uint64(iface_ea_u64, iface_ea);
+
+ /* Find the interface with this Ethernet address (if any) so that
+ * we can provide the correct devname to the caller. */
+ iface = NULL;
+ for (j = 0; j < port->n_ifaces; j++) {
+ struct iface *candidate = port->ifaces[j];
+ uint8_t candidate_ea[ETH_ADDR_LEN];
+ if (!netdev_get_etheraddr(candidate->netdev, candidate_ea)
+ && eth_addr_equals(iface_ea, candidate_ea)) {
+ iface = candidate;
+ }
+ }
+ } else {
+ /* Choose the interface whose MAC address will represent the port.
+ * The Linux kernel bonding code always chooses the MAC address of
+ * the first slave added to a bond, and the Fedora networking
+ * scripts always add slaves to a bond in alphabetical order, so
+ * for compatibility we choose the interface with the name that is
+ * first in alphabetical order. */
+ iface = port->ifaces[0];
+ for (j = 1; j < port->n_ifaces; j++) {
+ struct iface *candidate = port->ifaces[j];
+ if (strcmp(candidate->name, iface->name) < 0) {
+ iface = candidate;
+ }
+ }
+
+ /* The local port doesn't count (since we're trying to choose its
+ * MAC address anyway). Other internal ports don't count because
+ * we really want a physical MAC if we can get it, and internal
+ * ports typically have randomly generated MACs. */
if (iface->dp_ifidx == ODPP_LOCAL
|| cfg_get_bool(0, "iface.%s.internal", iface->name)) {
continue;
}
- error = netdev_nodev_get_etheraddr(iface->name, iface_ea);
- if (!error) {
- if (!eth_addr_is_multicast(iface_ea) &&
- !eth_addr_is_reserved(iface_ea) &&
- !eth_addr_is_zero(iface_ea) &&
- memcmp(iface_ea, ea, ETH_ADDR_LEN) < 0) {
- memcpy(ea, iface_ea, ETH_ADDR_LEN);
- *devname = iface->name;
- }
- } else {
+
+ /* Grab MAC. */
+ error = netdev_get_etheraddr(iface->netdev, iface_ea);
+ if (error) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_ERR_RL(&rl, "failed to obtain Ethernet address of %s: %s",
iface->name, strerror(error));
+ continue;
}
}
+
+ /* Compare against our current choice. */
+ if (!eth_addr_is_multicast(iface_ea) &&
+ !eth_addr_is_reserved(iface_ea) &&
+ !eth_addr_is_zero(iface_ea) &&
+ memcmp(iface_ea, ea, ETH_ADDR_LEN) < 0)
+ {
+ memcpy(ea, iface_ea, ETH_ADDR_LEN);
+ *hw_addr_iface = iface;
+ }
}
if (eth_addr_is_multicast(ea) || eth_addr_is_vif(ea)) {
memcpy(ea, br->default_ea, ETH_ADDR_LEN);
- *devname = NULL;
+ *hw_addr_iface = NULL;
VLOG_WARN("bridge %s: using default bridge Ethernet "
"address "ETH_ADDR_FMT, br->name, ETH_ADDR_ARGS(ea));
} else {
/* Choose and returns the datapath ID for bridge 'br' given that the bridge
* Ethernet address is 'bridge_ea'. If 'bridge_ea' is the Ethernet address of
- * a network device, then that network device's name must be passed in as
- * 'devname'; if 'bridge_ea' was derived some other way, then 'devname' must be
- * passed in as a null pointer. */
+ * an interface on 'br', then that interface must be passed in as
+ * 'hw_addr_iface'; if 'bridge_ea' was derived some other way, then
+ * 'hw_addr_iface' must be passed in as a null pointer. */
static uint64_t
bridge_pick_datapath_id(struct bridge *br,
const uint8_t bridge_ea[ETH_ADDR_LEN],
- const char *devname)
+ struct iface *hw_addr_iface)
{
/*
* The procedure for choosing a bridge MAC address will, in the most
return dpid;
}
- if (devname) {
+ if (hw_addr_iface) {
int vlan;
- if (!netdev_get_vlan_vid(devname, &vlan)) {
+ if (!netdev_get_vlan_vid(hw_addr_iface->netdev, &vlan)) {
/*
* A bridge whose MAC address is taken from a VLAN network device
* (that is, a network device created with vconfig(8) or similar
continue;
}
- if (br->ml) {
- mac_learning_wait(br->ml);
- }
+ mac_learning_wait(br->ml);
bond_wait(br);
brstp_wait(br);
}
{
COVERAGE_INC(bridge_flush);
br->flush = true;
- if (br->ml) {
- mac_learning_flush(br->ml);
+ mac_learning_flush(br->ml);
+}
+
+/* Returns the 'br' interface for the ODPP_LOCAL port, or null if 'br' has no
+ * such interface. */
+static struct iface *
+bridge_get_local_iface(struct bridge *br)
+{
+ size_t i, j;
+
+ for (i = 0; i < br->n_ports; i++) {
+ struct port *port = br->ports[i];
+ for (j = 0; j < port->n_ifaces; j++) {
+ struct iface *iface = port->ifaces[j];
+ if (iface->dp_ifidx == ODPP_LOCAL) {
+ return iface;
+ }
+ }
}
+
+ return NULL;
}
\f
/* Bridge unixctl user interface functions. */
{
struct ds ds = DS_EMPTY_INITIALIZER;
const struct bridge *br;
+ const struct mac_entry *e;
br = bridge_lookup(args);
if (!br) {
}
ds_put_cstr(&ds, " port VLAN MAC Age\n");
- if (br->ml) {
- const struct mac_entry *e;
- LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
- if (e->port < 0 || e->port >= br->n_ports) {
- continue;
- }
- ds_put_format(&ds, "%5d %4d "ETH_ADDR_FMT" %3d\n",
- br->ports[e->port]->ifaces[0]->dp_ifidx,
- e->vlan, ETH_ADDR_ARGS(e->mac), mac_entry_age(e));
+ LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
+ if (e->port < 0 || e->port >= br->n_ports) {
+ continue;
}
+ ds_put_format(&ds, "%5d %4d "ETH_ADDR_FMT" %3d\n",
+ br->ports[e->port]->ifaces[0]->dp_ifidx,
+ e->vlan, ETH_ADDR_ARGS(e->mac), mac_entry_age(e));
}
unixctl_command_reply(conn, 200, ds_cstr(&ds));
ds_destroy(&ds);
return br ? ofproto_get_datapath_id(br->ofproto) : 0;
}
+/* Handle requests for a listing of all flows known by the OpenFlow
+ * stack, including those normally hidden. */
+static void
+bridge_unixctl_dump_flows(struct unixctl_conn *conn, const char *args)
+{
+ struct bridge *br;
+ struct ds results;
+
+ br = bridge_lookup(args);
+ if (!br) {
+ unixctl_command_reply(conn, 501, "Unknown bridge");
+ return;
+ }
+
+ ds_init(&results);
+ ofproto_get_all_flows(br->ofproto, &results);
+
+ unixctl_command_reply(conn, 200, ds_cstr(&results));
+ ds_destroy(&results);
+}
+
static int
bridge_run_one(struct bridge *br)
{
return error;
}
- if (br->ml) {
- mac_learning_run(br->ml, ofproto_get_revalidate_set(br->ofproto));
- }
+ mac_learning_run(br->ml, ofproto_get_revalidate_set(br->ofproto));
bond_run(br);
brstp_run(br);
cfg_get_string(0, "%s.accept-regex", pfx),
update_resolv_conf);
} else {
- char local_name[IF_NAMESIZE];
- struct netdev *netdev;
+ struct iface *local_iface;
bool in_band;
- int error;
in_band = (!cfg_is_valid(CFG_BOOL | CFG_REQUIRED,
"%s.in-band", pfx)
ofproto_set_discovery(br->ofproto, false, NULL, NULL);
ofproto_set_in_band(br->ofproto, in_band);
- error = dpif_port_get_name(br->dpif, ODPP_LOCAL,
- local_name, sizeof local_name);
- if (!error) {
- error = netdev_open(local_name, NETDEV_ETH_TYPE_NONE, &netdev);
- }
- if (!error) {
- if (cfg_is_valid(CFG_IP | CFG_REQUIRED, "%s.ip", pfx)) {
- struct in_addr ip, mask, gateway;
- ip.s_addr = cfg_get_ip(0, "%s.ip", pfx);
- mask.s_addr = cfg_get_ip(0, "%s.netmask", pfx);
- gateway.s_addr = cfg_get_ip(0, "%s.gateway", pfx);
-
- netdev_turn_flags_on(netdev, NETDEV_UP, true);
- if (!mask.s_addr) {
- mask.s_addr = guess_netmask(ip.s_addr);
- }
- if (!netdev_set_in4(netdev, ip, mask)) {
- VLOG_INFO("bridge %s: configured IP address "IP_FMT", "
- "netmask "IP_FMT,
- br->name, IP_ARGS(&ip.s_addr),
- IP_ARGS(&mask.s_addr));
- }
+ local_iface = bridge_get_local_iface(br);
+ if (local_iface
+ && cfg_is_valid(CFG_IP | CFG_REQUIRED, "%s.ip", pfx)) {
+ struct netdev *netdev = local_iface->netdev;
+ struct in_addr ip, mask, gateway;
+ ip.s_addr = cfg_get_ip(0, "%s.ip", pfx);
+ mask.s_addr = cfg_get_ip(0, "%s.netmask", pfx);
+ gateway.s_addr = cfg_get_ip(0, "%s.gateway", pfx);
+
+ netdev_turn_flags_on(netdev, NETDEV_UP, true);
+ if (!mask.s_addr) {
+ mask.s_addr = guess_netmask(ip.s_addr);
+ }
+ if (!netdev_set_in4(netdev, ip, mask)) {
+ VLOG_INFO("bridge %s: configured IP address "IP_FMT", "
+ "netmask "IP_FMT,
+ br->name, IP_ARGS(&ip.s_addr),
+ IP_ARGS(&mask.s_addr));
+ }
- if (gateway.s_addr) {
- if (!netdev_add_router(netdev, gateway)) {
- VLOG_INFO("bridge %s: configured gateway "IP_FMT,
- br->name, IP_ARGS(&gateway.s_addr));
- }
+ if (gateway.s_addr) {
+ if (!netdev_add_router(netdev, gateway)) {
+ VLOG_INFO("bridge %s: configured gateway "IP_FMT,
+ br->name, IP_ARGS(&gateway.s_addr));
}
}
- netdev_close(netdev);
}
}
if (probe < 5) {
probe = cfg_get_int(0, "mgmt.inactivity-probe");
if (probe < 5) {
- probe = 15;
+ probe = 5;
}
}
ofproto_set_probe_interval(br->ofproto, probe);
if (!max_backoff) {
max_backoff = cfg_get_int(0, "mgmt.max-backoff");
if (!max_backoff) {
- max_backoff = 15;
+ max_backoff = 8;
}
}
ofproto_set_max_backoff(br->ofproto, max_backoff);
struct iface *iface = port->ifaces[j];
svec_add(ifaces, iface->name);
}
+ if (port->n_ifaces > 1
+ && cfg_get_bool(0, "bonding.%s.fake-iface", port->name)) {
+ svec_add(ifaces, port->name);
+ }
}
- svec_sort(ifaces);
- assert(svec_is_unique(ifaces));
+ svec_sort_unique(ifaces);
}
/* For robustness, in case the administrator moves around datapath ports behind
static int
bond_choose_iface(const struct port *port)
{
- size_t i;
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+ size_t i, best_down_slave = -1;
+ long long next_delay_expiration = LLONG_MAX;
+
for (i = 0; i < port->n_ifaces; i++) {
- if (port->ifaces[i]->enabled) {
+ struct iface *iface = port->ifaces[i];
+
+ if (iface->enabled) {
return i;
+ } else if (iface->delay_expires < next_delay_expiration) {
+ best_down_slave = i;
+ next_delay_expiration = iface->delay_expires;
}
}
- return -1;
+
+ if (best_down_slave != -1) {
+ struct iface *iface = port->ifaces[best_down_slave];
+
+ VLOG_INFO_RL(&rl, "interface %s: skipping remaining %lli ms updelay "
+ "since no other interface is up", iface->name,
+ iface->delay_expires - time_msec());
+ bond_enable_slave(iface, true);
+ }
+
+ return best_down_slave;
}
static bool
return false;
}
e->iface_tag = tag_create_random();
+ ((struct port *) port)->bond_compat_is_stale = true;
}
*tags |= e->iface_tag;
iface = port->ifaces[e->iface_idx];
iface->delay_expires = LLONG_MAX;
VLOG_INFO_RL(&rl, "interface %s: will not be %s",
iface->name, carrier ? "disabled" : "enabled");
- } else if (carrier && port->updelay && port->active_iface < 0) {
- iface->delay_expires = time_msec();
- VLOG_INFO_RL(&rl, "interface %s: skipping %d ms updelay since no "
- "other interface is up", iface->name, port->updelay);
+ } else if (carrier && port->active_iface < 0) {
+ bond_enable_slave(iface, true);
+ if (port->updelay) {
+ VLOG_INFO_RL(&rl, "interface %s: skipping %d ms updelay since no "
+ "other interface is up", iface->name, port->updelay);
+ }
} else {
int delay = carrier ? port->updelay : port->downdelay;
iface->delay_expires = time_msec() + delay;
struct port *port = iface->port;
struct bridge *br = port->bridge;
+ /* This acts as a recursion check. If the act of disabling a slave
+ * causes a different slave to be enabled, the flag will allow us to
+ * skip redundant work when we reenter this function. It must be
+ * cleared on exit to keep things safe with multiple bonds. */
+ static bool moving_active_iface = false;
+
iface->delay_expires = LLONG_MAX;
if (enable == iface->enabled) {
return;
if (iface->port_ifidx == port->active_iface) {
ofproto_revalidate(br->ofproto,
port->active_iface_tag);
+
+ /* Disabling a slave can lead to another slave being immediately
+ * enabled if there will be no active slaves but one is waiting
+ * on an updelay. In this case we do not need to run most of the
+ * code for the newly enabled slave since there was no period
+ * without an active slave and it is redundant with the disabling
+ * path. */
+ moving_active_iface = true;
bond_choose_active_iface(port);
}
bond_send_learning_packets(port);
} else {
VLOG_WARN("interface %s: enabled", iface->name);
- if (port->active_iface < 0) {
+ if (port->active_iface < 0 && !moving_active_iface) {
ofproto_revalidate(br->ofproto, port->no_ifaces_tag);
bond_choose_active_iface(port);
bond_send_learning_packets(port);
}
iface->tag = tag_create_random();
}
+
+ moving_active_iface = false;
+ port->bond_compat_is_stale = true;
}
static void
for (i = 0; i < br->n_ports; i++) {
struct port *port = br->ports[i];
- if (port->n_ifaces < 2) {
- continue;
- }
- for (j = 0; j < port->n_ifaces; j++) {
- struct iface *iface = port->ifaces[j];
- if (time_msec() >= iface->delay_expires) {
- bond_enable_slave(iface, !iface->enabled);
+
+ if (port->n_ifaces >= 2) {
+ for (j = 0; j < port->n_ifaces; j++) {
+ struct iface *iface = port->ifaces[j];
+ if (time_msec() >= iface->delay_expires) {
+ bond_enable_slave(iface, !iface->enabled);
+ }
}
}
+
+ if (port->bond_compat_is_stale) {
+ port->bond_compat_is_stale = false;
+ port_update_bond_compat(port);
+ }
}
}
static size_t
compose_dsts(const struct bridge *br, const flow_t *flow, uint16_t vlan,
const struct port *in_port, const struct port *out_port,
- struct dst dsts[], tag_type *tags)
+ struct dst dsts[], tag_type *tags, uint16_t *nf_output_iface)
{
mirror_mask_t mirrors = in_port->src_mirrors;
struct dst *dst = dsts;
dst++;
}
}
+ *nf_output_iface = NF_OUT_FLOOD;
} else if (out_port && set_dst(dst, flow, in_port, out_port, tags)) {
+ *nf_output_iface = dst->dp_ifidx;
mirrors |= out_port->dst_mirrors;
dst++;
}
for (i = 0; i < br->n_ports; i++) {
struct port *port = br->ports[i];
if (port_includes_vlan(port, m->out_vlan)
- && set_dst(dst, flow, in_port, port, tags)
- && !dst_is_duplicate(dsts, dst - dsts, dst))
+ && set_dst(dst, flow, in_port, port, tags))
{
+ int flow_vlan;
+
if (port->vlan < 0) {
dst->vlan = m->out_vlan;
}
- if (dst->dp_ifidx == flow->in_port
- && dst->vlan == vlan) {
+ if (dst_is_duplicate(dsts, dst - dsts, dst)) {
+ continue;
+ }
+
+ /* Use the vlan tag on the original flow instead of
+ * the one passed in the vlan parameter. This ensures
+ * that we compare the vlan from before any implicit
+ * tagging tags place. This is necessary because
+ * dst->vlan is the final vlan, after removing implicit
+ * tags. */
+ flow_vlan = ntohs(flow->dl_vlan);
+ if (flow_vlan == 0) {
+ flow_vlan = OFP_VLAN_NONE;
+ }
+ if (port == in_port && dst->vlan == flow_vlan) {
/* Don't send out input port on same VLAN. */
continue;
}
static void
compose_actions(struct bridge *br, const flow_t *flow, uint16_t vlan,
const struct port *in_port, const struct port *out_port,
- tag_type *tags, struct odp_actions *actions)
+ tag_type *tags, struct odp_actions *actions,
+ uint16_t *nf_output_iface)
{
struct dst dsts[DP_MAX_PORTS * (MAX_MIRRORS + 1)];
size_t n_dsts;
const struct dst *p;
uint16_t cur_vlan;
- n_dsts = compose_dsts(br, flow, vlan, in_port, out_port, dsts, tags);
+ n_dsts = compose_dsts(br, flow, vlan, in_port, out_port, dsts, tags,
+ nf_output_iface);
cur_vlan = ntohs(flow->dl_vlan);
for (p = dsts; p < &dsts[n_dsts]; p++) {
}
}
+/* Returns the effective vlan of a packet, taking into account both the
+ * 802.1Q header and implicitly tagged ports. A value of 0 indicates that
+ * the packet is untagged and -1 indicates it has an invalid header and
+ * should be dropped. */
+static int flow_get_vlan(struct bridge *br, const flow_t *flow,
+ struct port *in_port, bool have_packet)
+{
+ /* Note that dl_vlan of 0 and of OFP_VLAN_NONE both mean that the packet
+ * belongs to VLAN 0, so we should treat both cases identically. (In the
+ * former case, the packet has an 802.1Q header that specifies VLAN 0,
+ * presumably to allow a priority to be specified. In the latter case, the
+ * packet does not have any 802.1Q header.) */
+ int vlan = ntohs(flow->dl_vlan);
+ if (vlan == OFP_VLAN_NONE) {
+ vlan = 0;
+ }
+ if (in_port->vlan >= 0) {
+ if (vlan) {
+ /* XXX support double tagging? */
+ if (have_packet) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+ VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %"PRIu16" tagged "
+ "packet received on port %s configured with "
+ "implicit VLAN %"PRIu16,
+ br->name, ntohs(flow->dl_vlan),
+ in_port->name, in_port->vlan);
+ }
+ return -1;
+ }
+ vlan = in_port->vlan;
+ } else {
+ if (!port_includes_vlan(in_port, vlan)) {
+ if (have_packet) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+ VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %d tagged "
+ "packet received on port %s not configured for "
+ "trunking VLAN %d",
+ br->name, vlan, in_port->name, vlan);
+ }
+ return -1;
+ }
+ }
+
+ return vlan;
+}
+
+static void
+update_learning_table(struct bridge *br, const flow_t *flow, int vlan,
+ struct port *in_port)
+{
+ tag_type rev_tag = mac_learning_learn(br->ml, flow->dl_src,
+ vlan, in_port->port_idx);
+ if (rev_tag) {
+ /* The log messages here could actually be useful in debugging,
+ * so keep the rate limit relatively high. */
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30,
+ 300);
+ VLOG_DBG_RL(&rl, "bridge %s: learned that "ETH_ADDR_FMT" is "
+ "on port %s in VLAN %d",
+ br->name, ETH_ADDR_ARGS(flow->dl_src),
+ in_port->name, vlan);
+ ofproto_revalidate(br->ofproto, rev_tag);
+ }
+}
+
static bool
-is_bcast_arp_reply(const flow_t *flow, const struct ofpbuf *packet)
+is_bcast_arp_reply(const flow_t *flow)
{
- struct arp_eth_header *arp = (struct arp_eth_header *) packet->data;
return (flow->dl_type == htons(ETH_TYPE_ARP)
- && eth_addr_is_broadcast(flow->dl_dst)
- && packet->size >= sizeof(struct arp_eth_header)
- && arp->ar_op == ARP_OP_REQUEST);
+ && flow->nw_proto == ARP_OP_REPLY
+ && eth_addr_is_broadcast(flow->dl_dst));
}
/* If the composed actions may be applied to any packet in the given 'flow',
static bool
process_flow(struct bridge *br, const flow_t *flow,
const struct ofpbuf *packet, struct odp_actions *actions,
- tag_type *tags)
+ tag_type *tags, uint16_t *nf_output_iface)
{
struct iface *in_iface;
struct port *in_port;
struct port *out_port = NULL; /* By default, drop the packet/flow. */
int vlan;
+ int out_port_idx;
/* Find the interface and port structure for the received packet. */
in_iface = iface_from_dp_ifidx(br, flow->in_port);
return true;
}
in_port = in_iface->port;
-
- /* Figure out what VLAN this packet belongs to.
- *
- * Note that dl_vlan of 0 and of OFP_VLAN_NONE both mean that the packet
- * belongs to VLAN 0, so we should treat both cases identically. (In the
- * former case, the packet has an 802.1Q header that specifies VLAN 0,
- * presumably to allow a priority to be specified. In the latter case, the
- * packet does not have any 802.1Q header.) */
- vlan = ntohs(flow->dl_vlan);
- if (vlan == OFP_VLAN_NONE) {
- vlan = 0;
- }
- if (in_port->vlan >= 0) {
- if (vlan) {
- /* XXX support double tagging? */
- if (packet != NULL) {
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
- VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %"PRIu16" tagged "
- "packet received on port %s configured with "
- "implicit VLAN %"PRIu16,
- br->name, ntohs(flow->dl_vlan),
- in_port->name, in_port->vlan);
- }
- goto done;
- }
- vlan = in_port->vlan;
- } else {
- if (!port_includes_vlan(in_port, vlan)) {
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
- VLOG_WARN_RL(&rl, "bridge %s: dropping VLAN %d tagged "
- "packet received on port %s not configured for "
- "trunking VLAN %d",
- br->name, vlan, in_port->name, vlan);
- goto done;
- }
+ vlan = flow_get_vlan(br, flow, in_port, !!packet);
+ if (vlan < 0) {
+ goto done;
}
/* Drop frames for ports that STP wants entirely killed (both for
goto done;
}
- /* Multicast (and broadcast) packets on bonds need special attention, to
- * avoid receiving duplicates. */
- if (in_port->n_ifaces > 1 && eth_addr_is_multicast(flow->dl_dst)) {
- *tags |= in_port->active_iface_tag;
- if (in_port->active_iface != in_iface->port_ifidx) {
- /* Drop all multicast packets on inactive slaves. */
- goto done;
- } else {
- /* Drop all multicast packets for which we have learned a different
- * input port, because we probably sent the packet on one slaves
- * and got it back on the active slave. Broadcast ARP replies are
- * an exception to this rule: the host has moved to another
- * switch. */
- int src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan);
- if (src_idx != -1 && src_idx != in_port->port_idx) {
- if (packet) {
- if (!is_bcast_arp_reply(flow, packet)) {
- goto done;
- }
- } else {
- /* No way to know whether it's an ARP reply, because the
- * flow entry doesn't include enough information and we
- * don't have a packet. Punt. */
- return false;
- }
+ /* Packets received on bonds need special attention to avoid duplicates. */
+ if (in_port->n_ifaces > 1) {
+ int src_idx;
+
+ if (eth_addr_is_multicast(flow->dl_dst)) {
+ *tags |= in_port->active_iface_tag;
+ if (in_port->active_iface != in_iface->port_ifidx) {
+ /* Drop all multicast packets on inactive slaves. */
+ goto done;
}
}
+
+ /* Drop all packets for which we have learned a different input
+ * port, because we probably sent the packet on one slave and got
+ * it back on the other. Broadcast ARP replies are an exception
+ * to this rule: the host has moved to another switch. */
+ src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan);
+ if (src_idx != -1 && src_idx != in_port->port_idx &&
+ !is_bcast_arp_reply(flow)) {
+ goto done;
+ }
}
/* MAC learning. */
out_port = FLOOD_PORT;
- if (br->ml) {
- int out_port_idx;
-
- /* Learn source MAC (but don't try to learn from revalidation). */
- if (packet) {
- tag_type rev_tag = mac_learning_learn(br->ml, flow->dl_src,
- vlan, in_port->port_idx);
- if (rev_tag) {
- /* The log messages here could actually be useful in debugging,
- * so keep the rate limit relatively high. */
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30,
- 300);
- VLOG_DBG_RL(&rl, "bridge %s: learned that "ETH_ADDR_FMT" is "
- "on port %s in VLAN %d",
- br->name, ETH_ADDR_ARGS(flow->dl_src),
- in_port->name, vlan);
- ofproto_revalidate(br->ofproto, rev_tag);
- }
- }
-
- /* Determine output port. */
- out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan,
- tags);
- if (out_port_idx >= 0 && out_port_idx < br->n_ports) {
- out_port = br->ports[out_port_idx];
- }
+ /* Learn source MAC (but don't try to learn from revalidation). */
+ if (packet) {
+ update_learning_table(br, flow, vlan, in_port);
+ }
+
+ /* Determine output port. */
+ out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan,
+ tags);
+ if (out_port_idx >= 0 && out_port_idx < br->n_ports) {
+ out_port = br->ports[out_port_idx];
+ } else if (!packet && !eth_addr_is_multicast(flow->dl_dst)) {
+ /* If we are revalidating but don't have a learning entry then
+ * eject the flow. Installing a flow that floods packets opens
+ * up a window of time where we could learn from a packet reflected
+ * on a bond and blackhole packets before the learning table is
+ * updated to reflect the correct port. */
+ return false;
}
/* Don't send packets out their input ports. Don't forward frames that STP
}
done:
- compose_actions(br, flow, vlan, in_port, out_port, tags, actions);
+ compose_actions(br, flow, vlan, in_port, out_port, tags, actions,
+ nf_output_iface);
- /*
- * We send out only a single packet, instead of setting up a flow, if the
- * packet is an ARP directed to broadcast that arrived on a bonded
- * interface. In such a situation ARP requests and replies must be handled
- * differently, but OpenFlow unfortunately can't distinguish them.
- */
- return (in_port->n_ifaces < 2
- || flow->dl_type != htons(ETH_TYPE_ARP)
- || !eth_addr_is_broadcast(flow->dl_dst));
+ return true;
}
/* Careful: 'opp' is in host byte order and opp->port_no is an OFP port
static bool
bridge_normal_ofhook_cb(const flow_t *flow, const struct ofpbuf *packet,
- struct odp_actions *actions, tag_type *tags, void *br_)
+ struct odp_actions *actions, tag_type *tags,
+ uint16_t *nf_output_iface, void *br_)
{
struct bridge *br = br_;
#endif
COVERAGE_INC(bridge_process_flow);
- return process_flow(br, flow, packet, actions, tags);
+ return process_flow(br, flow, packet, actions, tags, nf_output_iface);
}
static void
void *br_)
{
struct bridge *br = br_;
+ struct port *in_port;
const union odp_action *a;
+ /* Feed information from the active flows back into the learning table
+ * to ensure that table is always in sync with what is actually flowing
+ * through the datapath. */
+ in_port = port_from_dp_ifidx(br, flow->in_port);
+ if (in_port) {
+ int vlan = flow_get_vlan(br, flow, in_port, false);
+ if (vlan >= 0) {
+ update_learning_table(br, flow, vlan, in_port);
+ }
+ }
+
if (!br->has_bonded_ports) {
return;
}
for (a = actions; a < &actions[n_actions]; a++) {
if (a->type == ODPAT_OUTPUT) {
- struct port *port = port_from_dp_ifidx(br, a->output.port);
- if (port && port->n_ifaces >= 2) {
- struct bond_entry *e = lookup_bond_entry(port, flow->dl_src);
+ struct port *out_port = port_from_dp_ifidx(br, a->output.port);
+ if (out_port && out_port->n_ifaces >= 2) {
+ struct bond_entry *e = lookup_bond_entry(out_port,
+ flow->dl_src);
e->tx_bytes += n_bytes;
}
}
/* Shifts 'hash' from 'from' to 'to' within 'port'. */
static void
bond_shift_load(struct slave_balance *from, struct slave_balance *to,
- struct bond_entry *hash)
+ int hash_idx)
{
+ struct bond_entry *hash = from->hashes[hash_idx];
struct port *port = from->iface->port;
uint64_t delta = hash->tx_bytes;
* it require more work, the only purpose it would be to allow that hash to
* be migrated to another slave in this rebalancing run, and there is no
* point in doing that. */
- if (from->hashes[0] == hash) {
+ if (hash_idx == 0) {
from->hashes++;
} else {
- int i = hash - from->hashes[0];
- memmove(from->hashes + i, from->hashes + i + 1,
- (from->n_hashes - (i + 1)) * sizeof *from->hashes);
+ memmove(from->hashes + hash_idx, from->hashes + hash_idx + 1,
+ (from->n_hashes - (hash_idx + 1)) * sizeof *from->hashes);
}
from->n_hashes--;
/* 'from' is carrying significantly more load than 'to', and that
* load is split across at least two different hashes. Pick a hash
* to migrate to 'to' (the least-loaded slave), given that doing so
- * must not cause 'to''s load to exceed 'from''s load.
+ * must decrease the ratio of the load on the two slaves by at
+ * least 0.1.
*
* The sort order we use means that we prefer to shift away the
* smallest hashes instead of the biggest ones. There is little
* reason behind this decision; we could use the opposite sort
* order to shift away big hashes ahead of small ones. */
size_t i;
+ bool order_swapped;
for (i = 0; i < from->n_hashes; i++) {
+ double old_ratio, new_ratio;
uint64_t delta = from->hashes[i]->tx_bytes;
- if (to->tx_bytes + delta < from->tx_bytes - delta) {
+
+ if (delta == 0 || from->tx_bytes - delta == 0) {
+ /* Pointless move. */
+ continue;
+ }
+
+ order_swapped = from->tx_bytes - delta < to->tx_bytes + delta;
+
+ if (to->tx_bytes == 0) {
+ /* Nothing on the new slave, move it. */
+ break;
+ }
+
+ old_ratio = (double)from->tx_bytes / to->tx_bytes;
+ new_ratio = (double)(from->tx_bytes - delta) /
+ (to->tx_bytes + delta);
+
+ if (new_ratio == 0) {
+ /* Should already be covered but check to prevent division
+ * by zero. */
+ continue;
+ }
+
+ if (new_ratio < 1) {
+ new_ratio = 1 / new_ratio;
+ }
+
+ if (old_ratio - new_ratio > 0.1) {
+ /* Would decrease the ratio, move it. */
break;
}
}
if (i < from->n_hashes) {
- bond_shift_load(from, to, from->hashes[i]);
+ bond_shift_load(from, to, i);
+ port->bond_compat_is_stale = true;
+
+ /* If the result of the migration changed the relative order of
+ * 'from' and 'to' swap them back to maintain invariants. */
+ if (order_swapped) {
+ swap_bals(from, to);
+ }
/* Re-sort 'bals'. Note that this may make 'from' and 'to'
* point to different slave_balance structures. It is only
struct ofpbuf packet;
int error, n_packets, n_errors;
- if (!port->n_ifaces || port->active_iface < 0 || !br->ml) {
+ if (!port->n_ifaces || port->active_iface < 0) {
return;
}
ofpbuf_init(&packet, 128);
error = n_packets = n_errors = 0;
LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
- static const char s[] = "Open vSwitch Bond Failover";
union ofp_action actions[2], *a;
- struct eth_header *eth;
- struct llc_snap_header *llc_snap;
uint16_t dp_ifidx;
tag_type tags = 0;
flow_t flow;
continue;
}
- /* Compose packet to send. */
- ofpbuf_clear(&packet);
- eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN);
- llc_snap = ofpbuf_put_zeros(&packet, LLC_SNAP_HEADER_LEN);
- ofpbuf_put(&packet, s, sizeof s); /* Includes null byte. */
- ofpbuf_put(&packet, e->mac, ETH_ADDR_LEN);
-
- memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN);
- memcpy(eth->eth_src, e->mac, ETH_ADDR_LEN);
- eth->eth_type = htons(packet.size - ETH_HEADER_LEN);
-
- llc_snap->llc.llc_dsap = LLC_DSAP_SNAP;
- llc_snap->llc.llc_ssap = LLC_SSAP_SNAP;
- llc_snap->llc.llc_cntl = LLC_CNTL_SNAP;
- memcpy(llc_snap->snap.snap_org, "\x00\x23\x20", 3);
- llc_snap->snap.snap_type = htons(0xf177); /* Random number. */
-
/* Compose actions. */
memset(actions, 0, sizeof actions);
a = actions;
/* Send packet. */
n_packets++;
+ compose_benign_packet(&packet, "Open vSwitch Bond Failover", 0xf177,
+ e->mac);
flow_extract(&packet, ODPP_NONE, &flow);
retval = ofproto_send_packet(br->ofproto, &flow, actions, a - actions,
&packet);
continue;
}
- ds_put_format(&ds, "\thash %d: %lld kB load\n",
+ ds_put_format(&ds, "\thash %d: %"PRIu64" kB load\n",
hash, be->tx_bytes / 1024);
/* MACs. */
- if (!port->bridge->ml) {
- break;
- }
-
LIST_FOR_EACH (me, struct mac_entry, lru_node,
&port->bridge->ml->lrus) {
uint16_t dp_ifidx;
ofproto_revalidate(port->bridge->ofproto, entry->iface_tag);
entry->iface_idx = iface->port_ifidx;
entry->iface_tag = tag_create_random();
+ port->bond_compat_is_stale = true;
unixctl_command_reply(conn, 200, "migrated");
}
enable_slave(conn, args, false);
}
+static void
+bond_unixctl_hash(struct unixctl_conn *conn, const char *args)
+{
+ uint8_t mac[ETH_ADDR_LEN];
+ uint8_t hash;
+ char *hash_cstr;
+
+ if (sscanf(args, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))
+ == ETH_ADDR_SCAN_COUNT) {
+ hash = bond_hash(mac);
+
+ hash_cstr = xasprintf("%u", hash);
+ unixctl_command_reply(conn, 200, hash_cstr);
+ free(hash_cstr);
+ } else {
+ unixctl_command_reply(conn, 501, "invalid mac");
+ }
+}
+
static void
bond_init(void)
{
bond_unixctl_set_active_slave);
unixctl_command_register("bond/enable-slave", bond_unixctl_enable_slave);
unixctl_command_register("bond/disable-slave", bond_unixctl_disable_slave);
+ unixctl_command_register("bond/hash", bond_unixctl_hash);
}
\f
/* Port functions. */
size_t i;
proc_net_compat_update_vlan(port->name, NULL, 0);
+ proc_net_compat_update_bond(port->name, NULL);
for (i = 0; i < MAX_MIRRORS; i++) {
struct mirror *m = br->mirrors[i];
if (port->bond_hash) {
free(port->bond_hash);
port->bond_hash = NULL;
- proc_net_compat_update_bond(port->name, NULL);
+ port->bond_compat_is_stale = true;
}
} else {
if (!port->bond_hash) {
port->no_ifaces_tag = tag_create_random();
bond_choose_active_iface(port);
}
- port_update_bond_compat(port);
+ port->bond_compat_is_stale = true;
}
}
static void
port_update_bond_compat(struct port *port)
{
+ struct compat_bond_hash compat_hashes[BOND_MASK + 1];
struct compat_bond bond;
size_t i;
if (port->n_ifaces < 2) {
+ proc_net_compat_update_bond(port->name, NULL);
return;
}
bond.up = false;
bond.updelay = port->updelay;
bond.downdelay = port->downdelay;
+
+ bond.n_hashes = 0;
+ bond.hashes = compat_hashes;
+ if (port->bond_hash) {
+ const struct bond_entry *e;
+ for (e = port->bond_hash; e <= &port->bond_hash[BOND_MASK]; e++) {
+ if (e->iface_idx >= 0 && e->iface_idx < port->n_ifaces) {
+ struct compat_bond_hash *cbh = &bond.hashes[bond.n_hashes++];
+ cbh->hash = e - port->bond_hash;
+ cbh->netdev_name = port->ifaces[e->iface_idx]->name;
+ }
+ }
+ }
+
bond.n_slaves = port->n_ifaces;
bond.slaves = xmalloc(port->n_ifaces * sizeof *bond.slaves);
for (i = 0; i < port->n_ifaces; i++) {
struct iface *iface = port->ifaces[i];
struct compat_bond_slave *slave = &bond.slaves[i];
slave->name = iface->name;
- slave->up = ((iface->enabled && iface->delay_expires == LLONG_MAX) ||
- (!iface->enabled && iface->delay_expires != LLONG_MAX));
+
+ /* We need to make the same determination as the Linux bonding
+ * code to determine whether a slave should be consider "up".
+ * The Linux function bond_miimon_inspect() supports four
+ * BOND_LINK_* states:
+ *
+ * - BOND_LINK_UP: carrier detected, updelay has passed.
+ * - BOND_LINK_FAIL: carrier lost, downdelay in progress.
+ * - BOND_LINK_DOWN: carrier lost, downdelay has passed.
+ * - BOND_LINK_BACK: carrier detected, updelay in progress.
+ *
+ * The function bond_info_show_slave() only considers BOND_LINK_UP
+ * to be "up" and anything else to be "down".
+ */
+ slave->up = iface->enabled && iface->delay_expires == LLONG_MAX;
if (slave->up) {
bond.up = true;
}
netdev_get_etheraddr(iface->netdev, slave->mac);
}
+
+ if (cfg_get_bool(0, "bonding.%s.fake-iface", port->name)) {
+ struct netdev *bond_netdev;
+
+ if (!netdev_open(port->name, NETDEV_ETH_TYPE_NONE, &bond_netdev)) {
+ if (bond.up) {
+ netdev_turn_flags_on(bond_netdev, NETDEV_UP, true);
+ } else {
+ netdev_turn_flags_off(bond_netdev, NETDEV_UP, true);
+ }
+ netdev_close(bond_netdev);
+ }
+ }
+
proc_net_compat_update_bond(port->name, &bond);
free(bond.slaves);
}
{
return port_array_get(&br->ifaces, dp_ifidx);
}
+
+/* Returns true if 'iface' is the name of an "internal" interface on bridge
+ * 'br', that is, an interface that is entirely simulated within the datapath.
+ * The local port (ODPP_LOCAL) is always an internal interface. Other local
+ * interfaces are created by setting "iface.<iface>.internal = true".
+ *
+ * In addition, we have a kluge-y feature that creates an internal port with
+ * the name of a bonded port if "bonding.<bondname>.fake-iface = true" is set.
+ * This feature needs to go away in the long term. Until then, this is one
+ * reason why this function takes a name instead of a struct iface: the fake
+ * interfaces created this way do not have a struct iface. */
+static bool
+iface_is_internal(const struct bridge *br, const char *iface)
+{
+ if (!strcmp(iface, br->name)
+ || cfg_get_bool(0, "iface.%s.internal", iface)) {
+ return true;
+ }
+
+ if (cfg_get_bool(0, "bonding.%s.fake-iface", iface)) {
+ struct port *port = port_lookup(br, iface);
+ if (port && port->n_ifaces > 1) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* Set Ethernet address of 'iface', if one is specified in the configuration
+ * file. */
+static void
+iface_set_mac(struct iface *iface)
+{
+ uint64_t mac = cfg_get_mac(0, "iface.%s.mac", iface->name);
+ if (mac) {
+ static uint8_t ea[ETH_ADDR_LEN];
+
+ eth_addr_from_uint64(mac, ea);
+ if (eth_addr_is_multicast(ea)) {
+ VLOG_ERR("interface %s: cannot set MAC to multicast address",
+ iface->name);
+ } else if (iface->dp_ifidx == ODPP_LOCAL) {
+ VLOG_ERR("ignoring iface.%s.mac; use bridge.%s.mac instead",
+ iface->name, iface->name);
+ } else {
+ int error = netdev_set_etheraddr(iface->netdev, ea);
+ if (error) {
+ VLOG_ERR("interface %s: setting MAC failed (%s)",
+ iface->name, strerror(error));
+ }
+ }
+ }
+}
\f
/* Port mirroring. */
mirror_reconfigure(struct bridge *br)
{
struct svec old_mirrors, new_mirrors;
- size_t i;
+ size_t i, n_rspan_vlans;
+ unsigned long *rspan_vlans;
/* Collect old and new mirrors. */
svec_init(&old_mirrors);
m->out_port->is_mirror_output_port = true;
}
}
+
+ /* Update learning disabled vlans (for RSPAN). */
+ rspan_vlans = NULL;
+ n_rspan_vlans = cfg_count("vlan.%s.disable-learning", br->name);
+ if (n_rspan_vlans) {
+ rspan_vlans = bitmap_allocate(4096);
+
+ for (i = 0; i < n_rspan_vlans; i++) {
+ int vlan = cfg_get_vlan(i, "vlan.%s.disable-learning", br->name);
+ if (vlan >= 0) {
+ bitmap_set1(rspan_vlans, vlan);
+ VLOG_INFO("bridge %s: disabling learning on vlan %d\n",
+ br->name, vlan);
+ } else {
+ VLOG_ERR("bridge %s: invalid value '%s' for learning disabled "
+ "VLAN", br->name,
+ cfg_get_string(i, "vlan.%s.disable-learning", br->name));
+ }
+ }
+ }
+ if (mac_learning_set_disabled_vlans(br->ml, rspan_vlans)) {
+ bridge_flush(br);
+ }
}
static void
int *vlans;
size_t i;
bool mirror_all_ports;
+ bool any_ports_specified;
/* Get output port. */
out_port_name = cfg_get_key(0, "mirror.%s.%s.output.port",
cfg_get_all_keys(&src_ports, "%s.select.src-port", pfx);
cfg_get_all_keys(&dst_ports, "%s.select.dst-port", pfx);
cfg_get_all_keys(&ports, "%s.select.port", pfx);
+ any_ports_specified = src_ports.n || dst_ports.n || ports.n;
svec_append(&src_ports, &ports);
svec_append(&dst_ports, &ports);
svec_destroy(&ports);
prune_ports(m, &src_ports);
prune_ports(m, &dst_ports);
+ if (any_ports_specified && !src_ports.n && !dst_ports.n) {
+ VLOG_ERR("%s: none of the specified ports exist; "
+ "disabling port mirror %s", pfx, pfx);
+ mirror_destroy(m);
+ goto exit;
+ }
/* Get all the vlans, and drop duplicate and invalid vlans. */
svec_init(&vlan_strings);
}
/* Clean up. */
+exit:
svec_destroy(&src_ports);
svec_destroy(&dst_ports);
free(pfx);