tag_type active_iface_tag; /* Tag for bcast flows. */
tag_type no_ifaces_tag; /* Tag for flows when all ifaces disabled. */
int updelay, downdelay; /* Delay before iface goes up/down, in ms. */
+ bool bond_compat_is_stale; /* Need to call port_update_bond_compat()? */
/* Port mirroring info. */
mirror_mask_t src_mirrors; /* Mirrors triggered when packet received. */
static struct bridge *bridge_create(const char *name);
static void bridge_destroy(struct bridge *);
static struct bridge *bridge_lookup(const char *name);
+static void bridge_unixctl_dump_flows(struct unixctl_conn *, const char *);
static int bridge_run_one(struct bridge *);
static void bridge_reconfigure_one(struct bridge *);
static void bridge_reconfigure_controller(struct bridge *);
const char *devname);
static uint64_t dpid_from_hash(const void *, size_t nbytes);
+static void bridge_unixctl_fdb_show(struct unixctl_conn *, const char *args);
+
static void bond_init(void);
static void bond_run(struct bridge *);
static void bond_wait(struct bridge *);
static struct iface *iface_lookup(const struct bridge *, const char *name);
static struct iface *iface_from_dp_ifidx(const struct bridge *,
uint16_t dp_ifidx);
+static bool iface_is_internal(const struct bridge *, const char *name);
+static void iface_set_mac(struct iface *);
/* Hooks into ofproto processing. */
static struct ofhooks bridge_ofhooks;
bond_init();
+ unixctl_command_register("fdb/show", bridge_unixctl_fdb_show);
+
for (i = 0; i < DP_MAX; i++) {
struct dpif dpif;
char devname[16];
}
}
+ unixctl_command_register("bridge/dump-flows", bridge_unixctl_dump_flows);
+
bridge_reconfigure();
}
for (i = 0; i < add_ifaces.n; i++) {
const char *if_name = add_ifaces.names[i];
for (;;) {
- int internal = cfg_get_bool(0, "iface.%s.internal", if_name);
- int error = dpif_port_add(&br->dpif, if_name, next_port_no++,
- internal ? ODP_PORT_INTERNAL : 0);
+ bool internal;
+ int error;
+
+ /* Add to datapath. */
+ internal = iface_is_internal(br, if_name);
+ error = dpif_port_add(&br->dpif, if_name, next_port_no++,
+ internal ? ODP_PORT_INTERNAL : 0);
if (error != EEXIST) {
if (next_port_no >= 256) {
VLOG_ERR("ran out of valid port numbers on dp%u",
VLOG_ERR("bridge %s: problem setting netflow collectors",
br->name);
}
+ svec_destroy(&nf_hosts);
/* Update the controller and related settings. It would be more
* straightforward to call this from bridge_reconfigure_one(), but we
LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
for (i = 0; i < br->n_ports; i++) {
struct port *port = br->ports[i];
+
port_update_vlan_compat(port);
+
+ for (j = 0; j < port->n_ifaces; j++) {
+ struct iface *iface = port->ifaces[j];
+ if (iface->dp_ifidx != ODPP_LOCAL
+ && iface_is_internal(br, iface->name)) {
+ iface_set_mac(iface);
+ }
+ }
}
}
LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
memset(ea, 0xff, sizeof ea);
for (i = 0; i < br->n_ports; i++) {
struct port *port = br->ports[i];
+ uint8_t iface_ea[ETH_ADDR_LEN];
+ uint64_t iface_ea_u64;
+ struct iface *iface;
+
+ /* Mirror output ports don't participate. */
if (port->is_mirror_output_port) {
continue;
}
- for (j = 0; j < port->n_ifaces; j++) {
- struct iface *iface = port->ifaces[j];
- uint8_t iface_ea[ETH_ADDR_LEN];
+
+ /* Choose the MAC address to represent the port. */
+ iface_ea_u64 = cfg_get_mac(0, "port.%s.mac", port->name);
+ if (iface_ea_u64) {
+ /* User specified explicitly. */
+ eth_addr_from_uint64(iface_ea_u64, iface_ea);
+
+ /* Find the interface with this Ethernet address (if any) so that
+ * we can provide the correct devname to the caller. */
+ iface = NULL;
+ for (j = 0; j < port->n_ifaces; j++) {
+ struct iface *candidate = port->ifaces[j];
+ uint8_t candidate_ea[ETH_ADDR_LEN];
+ if (!netdev_nodev_get_etheraddr(candidate->name, candidate_ea)
+ && eth_addr_equals(iface_ea, candidate_ea)) {
+ iface = candidate;
+ }
+ }
+ } else {
+ /* Choose the interface whose MAC address will represent the port.
+ * The Linux kernel bonding code always chooses the MAC address of
+ * the first slave added to a bond, and the Fedora networking
+ * scripts always add slaves to a bond in alphabetical order, so
+ * for compatibility we choose the interface with the name that is
+ * first in alphabetical order. */
+ iface = port->ifaces[0];
+ for (j = 1; j < port->n_ifaces; j++) {
+ struct iface *candidate = port->ifaces[j];
+ if (strcmp(candidate->name, iface->name) < 0) {
+ iface = candidate;
+ }
+ }
+
+ /* The local port doesn't count (since we're trying to choose its
+ * MAC address anyway). Other internal ports don't count because
+ * we really want a physical MAC if we can get it, and internal
+ * ports typically have randomly generated MACs. */
if (iface->dp_ifidx == ODPP_LOCAL
|| cfg_get_bool(0, "iface.%s.internal", iface->name)) {
continue;
}
+
+ /* Grab MAC. */
error = netdev_nodev_get_etheraddr(iface->name, iface_ea);
- if (!error) {
- if (!eth_addr_is_multicast(iface_ea) &&
- !eth_addr_is_reserved(iface_ea) &&
- !eth_addr_is_zero(iface_ea) &&
- memcmp(iface_ea, ea, ETH_ADDR_LEN) < 0) {
- memcpy(ea, iface_ea, ETH_ADDR_LEN);
- *devname = iface->name;
- }
- } else {
+ if (error) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_ERR_RL(&rl, "failed to obtain Ethernet address of %s: %s",
iface->name, strerror(error));
+ continue;
}
}
+
+ /* Compare against our current choice. */
+ if (!eth_addr_is_multicast(iface_ea) &&
+ !eth_addr_is_reserved(iface_ea) &&
+ !eth_addr_is_zero(iface_ea) &&
+ memcmp(iface_ea, ea, ETH_ADDR_LEN) < 0)
+ {
+ memcpy(ea, iface_ea, ETH_ADDR_LEN);
+ *devname = iface ? iface->name : NULL;
+ }
}
if (eth_addr_is_multicast(ea) || eth_addr_is_vif(ea)) {
memcpy(ea, br->default_ea, ETH_ADDR_LEN);
}
}
\f
+/* Bridge unixctl user interface functions. */
+static void
+bridge_unixctl_fdb_show(struct unixctl_conn *conn, const char *args)
+{
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ const struct bridge *br;
+
+ br = bridge_lookup(args);
+ if (!br) {
+ unixctl_command_reply(conn, 501, "no such bridge");
+ return;
+ }
+
+ ds_put_cstr(&ds, " port VLAN MAC Age\n");
+ if (br->ml) {
+ const struct mac_entry *e;
+ LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
+ if (e->port < 0 || e->port >= br->n_ports) {
+ continue;
+ }
+ ds_put_format(&ds, "%5d %4d "ETH_ADDR_FMT" %3d\n",
+ br->ports[e->port]->ifaces[0]->dp_ifidx,
+ e->vlan, ETH_ADDR_ARGS(e->mac), mac_entry_age(e));
+ }
+ }
+ unixctl_command_reply(conn, 200, ds_cstr(&ds));
+ ds_destroy(&ds);
+}
+\f
/* Bridge reconfiguration functions. */
static struct bridge *
return br ? ofproto_get_datapath_id(br->ofproto) : 0;
}
+/* Handle requests for a listing of all flows known by the OpenFlow
+ * stack, including those normally hidden. */
+static void
+bridge_unixctl_dump_flows(struct unixctl_conn *conn, const char *args)
+{
+ struct bridge *br;
+ struct ds results;
+
+ br = bridge_lookup(args);
+ if (!br) {
+ unixctl_command_reply(conn, 501, "Unknown bridge");
+ return;
+ }
+
+ ds_init(&results);
+ ofproto_get_all_flows(br->ofproto, &results);
+
+ unixctl_command_reply(conn, 200, ds_cstr(&results));
+ ds_destroy(&results);
+}
+
static int
bridge_run_one(struct bridge *br)
{
int rate_limit, burst_limit;
if (!strcmp(controller, "discover")) {
+ bool update_resolv_conf = true;
+
+ if (cfg_has("%s.update-resolv.conf", pfx)) {
+ update_resolv_conf = cfg_get_bool(0, "%s.update-resolv.conf",
+ pfx);
+ }
ofproto_set_discovery(br->ofproto, true,
cfg_get_string(0, "%s.accept-regex", pfx),
- cfg_get_bool(0, "%s.update-resolv.conf",
- pfx));
+ update_resolv_conf);
} else {
struct netdev *netdev;
bool in_band;
|| !strcmp(fail_mode, "open")));
probe = cfg_get_int(0, "%s.inactivity-probe", pfx);
- ofproto_set_probe_interval(br->ofproto,
- probe ? probe : cfg_get_int(0, "mgmt.inactivity-probe"));
+ if (probe < 5) {
+ probe = cfg_get_int(0, "mgmt.inactivity-probe");
+ if (probe < 5) {
+ probe = 5;
+ }
+ }
+ ofproto_set_probe_interval(br->ofproto, probe);
max_backoff = cfg_get_int(0, "%s.max-backoff", pfx);
if (!max_backoff) {
max_backoff = cfg_get_int(0, "mgmt.max-backoff");
if (!max_backoff) {
- max_backoff = 15;
+ max_backoff = 8;
}
}
ofproto_set_max_backoff(br->ofproto, max_backoff);
struct iface *iface = port->ifaces[j];
svec_add(ifaces, iface->name);
}
+ if (port->n_ifaces > 1
+ && cfg_get_bool(0, "bonding.%s.fake-iface", port->name)) {
+ svec_add(ifaces, port->name);
+ }
}
- svec_sort(ifaces);
- assert(svec_is_unique(ifaces));
+ svec_sort_unique(ifaces);
}
/* For robustness, in case the administrator moves around datapath ports behind
return false;
}
e->iface_tag = tag_create_random();
+ ((struct port *) port)->bond_compat_is_stale = true;
}
*tags |= e->iface_tag;
iface = port->ifaces[e->iface_idx];
iface->delay_expires = LLONG_MAX;
VLOG_INFO_RL(&rl, "interface %s: will not be %s",
iface->name, carrier ? "disabled" : "enabled");
+ } else if (carrier && port->updelay && port->active_iface < 0) {
+ iface->delay_expires = time_msec();
+ VLOG_INFO_RL(&rl, "interface %s: skipping %d ms updelay since no "
+ "other interface is up", iface->name, port->updelay);
} else {
int delay = carrier ? port->updelay : port->downdelay;
iface->delay_expires = time_msec() + delay;
iface->enabled = enable;
if (!iface->enabled) {
- VLOG_WARN("interface %s: enabled", iface->name);
+ VLOG_WARN("interface %s: disabled", iface->name);
ofproto_revalidate(br->ofproto, iface->tag);
if (iface->port_ifidx == port->active_iface) {
ofproto_revalidate(br->ofproto,
}
bond_send_learning_packets(port);
} else {
- VLOG_WARN("interface %s: disabled", iface->name);
+ VLOG_WARN("interface %s: enabled", iface->name);
if (port->active_iface < 0) {
ofproto_revalidate(br->ofproto, port->no_ifaces_tag);
bond_choose_active_iface(port);
}
iface->tag = tag_create_random();
}
+ port_update_bond_compat(port);
}
static void
for (i = 0; i < br->n_ports; i++) {
struct port *port = br->ports[i];
+
+ if (port->bond_compat_is_stale) {
+ port->bond_compat_is_stale = false;
+ port_update_bond_compat(port);
+ }
+
if (port->n_ifaces < 2) {
continue;
}
for (i = 0; i < br->n_ports; i++) {
struct port *port = br->ports[i];
if (port_includes_vlan(port, m->out_vlan)
- && set_dst(dst, flow, in_port, port, tags)
- && !dst_is_duplicate(dsts, dst - dsts, dst))
+ && set_dst(dst, flow, in_port, port, tags))
{
if (port->vlan < 0) {
dst->vlan = m->out_vlan;
}
- if (dst->dp_ifidx == flow->in_port
- && dst->vlan == vlan) {
+ if (dst_is_duplicate(dsts, dst - dsts, dst)) {
+ continue;
+ }
+ if (port == in_port && dst->vlan == vlan) {
/* Don't send out input port on same VLAN. */
continue;
}
goto done;
} else {
/* Drop all multicast packets for which we have learned a different
- * input port, because we probably sent the packet on one slaves
+ * input port, because we probably sent the packet on one slave
* and got it back on the active slave. Broadcast ARP replies are
* an exception to this rule: the host has moved to another
* switch. */
} else {
from++;
}
+ port->bond_compat_is_stale = true;
}
}
ofpbuf_init(&packet, 128);
error = n_packets = n_errors = 0;
LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
- static const char s[] = "Open vSwitch Bond Failover";
union ofp_action actions[2], *a;
- struct eth_header *eth;
- struct llc_snap_header *llc_snap;
uint16_t dp_ifidx;
tag_type tags = 0;
flow_t flow;
continue;
}
- /* Compose packet to send. */
- ofpbuf_clear(&packet);
- eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN);
- llc_snap = ofpbuf_put_zeros(&packet, LLC_SNAP_HEADER_LEN);
- ofpbuf_put(&packet, s, sizeof s); /* Includes null byte. */
- ofpbuf_put(&packet, e->mac, ETH_ADDR_LEN);
-
- memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN);
- memcpy(eth->eth_src, e->mac, ETH_ADDR_LEN);
- eth->eth_type = htons(packet.size - ETH_HEADER_LEN);
-
- llc_snap->llc.llc_dsap = LLC_DSAP_SNAP;
- llc_snap->llc.llc_ssap = LLC_SSAP_SNAP;
- llc_snap->llc.llc_cntl = LLC_CNTL_SNAP;
- memcpy(llc_snap->snap.snap_org, "\x00\x23\x20", 3);
- llc_snap->snap.snap_type = htons(0xf177); /* Random number. */
-
/* Compose actions. */
memset(actions, 0, sizeof actions);
a = actions;
/* Send packet. */
n_packets++;
+ compose_benign_packet(&packet, "Open vSwitch Bond Failover", 0xf177,
+ e->mac);
flow_extract(&packet, ODPP_NONE, &flow);
retval = ofproto_send_packet(br->ofproto, &flow, actions, a - actions,
&packet);
return;
}
- if (sscanf(hash_s, "%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8,
- &mac[0], &mac[1], &mac[2], &mac[3], &mac[4], &mac[5]) == 6) {
+ if (sscanf(hash_s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))
+ == ETH_ADDR_SCAN_COUNT) {
hash = bond_hash(mac);
} else if (strspn(hash_s, "0123456789") == strlen(hash_s)) {
hash = atoi(hash_s) & BOND_MASK;
ofproto_revalidate(port->bridge->ofproto, entry->iface_tag);
entry->iface_idx = iface->port_ifidx;
entry->iface_tag = tag_create_random();
+ port->bond_compat_is_stale = true;
unixctl_command_reply(conn, 200, "migrated");
}
size_t i;
proc_net_compat_update_vlan(port->name, NULL, 0);
+ proc_net_compat_update_bond(port->name, NULL);
for (i = 0; i < MAX_MIRRORS; i++) {
struct mirror *m = br->mirrors[i];
if (port->bond_hash) {
free(port->bond_hash);
port->bond_hash = NULL;
- proc_net_compat_update_bond(port->name, NULL);
+ port->bond_compat_is_stale = true;
}
} else {
if (!port->bond_hash) {
port->no_ifaces_tag = tag_create_random();
bond_choose_active_iface(port);
}
- port_update_bond_compat(port);
+ port->bond_compat_is_stale = true;
}
}
static void
port_update_bond_compat(struct port *port)
{
+ struct compat_bond_hash compat_hashes[BOND_MASK + 1];
struct compat_bond bond;
size_t i;
if (port->n_ifaces < 2) {
+ proc_net_compat_update_bond(port->name, NULL);
return;
}
bond.up = false;
bond.updelay = port->updelay;
bond.downdelay = port->downdelay;
+
+ bond.n_hashes = 0;
+ bond.hashes = compat_hashes;
+ if (port->bond_hash) {
+ const struct bond_entry *e;
+ for (e = port->bond_hash; e <= &port->bond_hash[BOND_MASK]; e++) {
+ if (e->iface_idx >= 0 && e->iface_idx < port->n_ifaces) {
+ struct compat_bond_hash *cbh = &bond.hashes[bond.n_hashes++];
+ cbh->hash = e - port->bond_hash;
+ cbh->netdev_name = port->ifaces[e->iface_idx]->name;
+ }
+ }
+ }
+
bond.n_slaves = port->n_ifaces;
bond.slaves = xmalloc(port->n_ifaces * sizeof *bond.slaves);
for (i = 0; i < port->n_ifaces; i++) {
struct iface *iface = port->ifaces[i];
struct compat_bond_slave *slave = &bond.slaves[i];
slave->name = iface->name;
- slave->up = ((iface->enabled && iface->delay_expires == LLONG_MAX) ||
- (!iface->enabled && iface->delay_expires != LLONG_MAX));
+
+ /* We need to make the same determination as the Linux bonding
+ * code to determine whether a slave should be consider "up".
+ * The Linux function bond_miimon_inspect() supports four
+ * BOND_LINK_* states:
+ *
+ * - BOND_LINK_UP: carrier detected, updelay has passed.
+ * - BOND_LINK_FAIL: carrier lost, downdelay in progress.
+ * - BOND_LINK_DOWN: carrier lost, downdelay has passed.
+ * - BOND_LINK_BACK: carrier detected, updelay in progress.
+ *
+ * The function bond_info_show_slave() only considers BOND_LINK_UP
+ * to be "up" and anything else to be "down".
+ */
+ slave->up = iface->enabled && iface->delay_expires == LLONG_MAX;
if (slave->up) {
bond.up = true;
}
memcpy(slave->mac, iface->mac, ETH_ADDR_LEN);
}
+
+ if (cfg_get_bool(0, "bonding.%s.fake-iface", port->name)) {
+ struct netdev *bond_netdev;
+
+ if (!netdev_open(port->name, NETDEV_ETH_TYPE_NONE, &bond_netdev)) {
+ if (bond.up) {
+ netdev_turn_flags_on(bond_netdev, NETDEV_UP, true);
+ } else {
+ netdev_turn_flags_off(bond_netdev, NETDEV_UP, true);
+ }
+ netdev_close(bond_netdev);
+ }
+ }
+
proc_net_compat_update_bond(port->name, &bond);
free(bond.slaves);
}
iface->tag = tag_create_random();
iface->delay_expires = LLONG_MAX;
- netdev_nodev_get_etheraddr(name, iface->mac);
- netdev_nodev_get_carrier(name, &iface->enabled);
+ if (!cfg_get_bool(0, "iface.%s.internal", iface->name)) {
+ netdev_nodev_get_etheraddr(name, iface->mac);
+ netdev_nodev_get_carrier(name, &iface->enabled);
+ } else {
+ /* Internal interfaces are created later by the call to dpif_port_add()
+ * in bridge_reconfigure(). Until then, we can't obtain any
+ * information about them. (There's no real value in doing so, anyway,
+ * because the 'mac' and 'enabled' values are only used for interfaces
+ * that are bond slaves, and it doesn't normally make sense to bond an
+ * internal interface.) */
+ }
if (port->n_ifaces >= port->allocated_ifaces) {
port->ifaces = x2nrealloc(port->ifaces, &port->allocated_ifaces,
{
return port_array_get(&br->ifaces, dp_ifidx);
}
+
+/* Returns true if 'iface' is the name of an "internal" interface on bridge
+ * 'br', that is, an interface that is entirely simulated within the datapath.
+ * The local port (ODPP_LOCAL) is always an internal interface. Other local
+ * interfaces are created by setting "iface.<iface>.internal = true".
+ *
+ * In addition, we have a kluge-y feature that creates an internal port with
+ * the name of a bonded port if "bonding.<bondname>.fake-iface = true" is set.
+ * This feature needs to go away in the long term. Until then, this is one
+ * reason why this function takes a name instead of a struct iface: the fake
+ * interfaces created this way do not have a struct iface. */
+static bool
+iface_is_internal(const struct bridge *br, const char *iface)
+{
+ if (!strcmp(iface, br->name)
+ || cfg_get_bool(0, "iface.%s.internal", iface)) {
+ return true;
+ }
+
+ if (cfg_get_bool(0, "bonding.%s.fake-iface", iface)) {
+ struct port *port = port_lookup(br, iface);
+ if (port && port->n_ifaces > 1) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* Set Ethernet address of 'iface', if one is specified in the configuration
+ * file. */
+static void
+iface_set_mac(struct iface *iface)
+{
+ uint64_t mac = cfg_get_mac(0, "iface.%s.mac", iface->name);
+ if (mac) {
+ static uint8_t ea[ETH_ADDR_LEN];
+
+ eth_addr_from_uint64(mac, ea);
+ if (eth_addr_is_multicast(ea)) {
+ VLOG_ERR("interface %s: cannot set MAC to multicast address",
+ iface->name);
+ } else if (iface->dp_ifidx == ODPP_LOCAL) {
+ VLOG_ERR("ignoring iface.%s.mac; use bridge.%s.mac instead",
+ iface->name, iface->name);
+ } else {
+ int error = netdev_nodev_set_etheraddr(iface->name, ea);
+ if (error) {
+ VLOG_ERR("interface %s: setting MAC failed (%s)",
+ iface->name, strerror(error));
+ }
+ }
+ }
+}
\f
/* Port mirroring. */
int *vlans;
size_t i;
bool mirror_all_ports;
+ bool any_ports_specified;
/* Get output port. */
out_port_name = cfg_get_key(0, "mirror.%s.%s.output.port",
cfg_get_all_keys(&src_ports, "%s.select.src-port", pfx);
cfg_get_all_keys(&dst_ports, "%s.select.dst-port", pfx);
cfg_get_all_keys(&ports, "%s.select.port", pfx);
+ any_ports_specified = src_ports.n || dst_ports.n || ports.n;
svec_append(&src_ports, &ports);
svec_append(&dst_ports, &ports);
svec_destroy(&ports);
prune_ports(m, &src_ports);
prune_ports(m, &dst_ports);
+ if (any_ports_specified && !src_ports.n && !dst_ports.n) {
+ VLOG_ERR("%s: none of the specified ports exist; "
+ "disabling port mirror %s", pfx, pfx);
+ mirror_destroy(m);
+ goto exit;
+ }
/* Get all the vlans, and drop duplicate and invalid vlans. */
svec_init(&vlan_strings);
}
/* Clean up. */
+exit:
svec_destroy(&src_ports);
svec_destroy(&dst_ports);
free(pfx);