-/* Copyright (c) 2008, 2009, 2010 Nicira Networks
+/* Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
uint16_t dp_ifidx;
};
+struct dst_set {
+ struct dst builtin[32];
+ struct dst *dsts;
+ size_t n, allocated;
+};
+
+static void dst_set_init(struct dst_set *);
+static void dst_set_add(struct dst_set *, const struct dst *);
+static void dst_set_free(struct dst_set *);
+
struct iface {
/* These members are always valid. */
struct port *port; /* Containing port. */
tag_type iface_tag; /* Tag associated with iface_idx. */
};
+enum bond_mode {
+ BM_SLB, /* Source Load Balance (Default). */
+ BM_AB /* Active Backup. */
+};
+
#define MAX_MIRRORS 32
typedef uint32_t mirror_mask_t;
#define MIRROR_MASK_C(X) UINT32_C(X)
size_t n_ifaces, allocated_ifaces;
/* Bonding info. */
- struct bond_entry *bond_hash; /* An array of (BOND_MASK + 1) elements. */
+ enum bond_mode bond_mode; /* Type of the bond. BM_SLB is the default. */
int active_iface; /* Ifidx on which bcasts accepted, or -1. */
tag_type active_iface_tag; /* Tag for bcast flows. */
tag_type no_ifaces_tag; /* Tag for flows when all ifaces disabled. */
int updelay, downdelay; /* Delay before iface goes up/down, in ms. */
bool bond_compat_is_stale; /* Need to call port_update_bond_compat()? */
bool bond_fake_iface; /* Fake a bond interface for legacy compat? */
+ bool miimon; /* Use miimon instead of carrier? */
+ long long int bond_miimon_interval; /* Miimon status refresh interval. */
+ long long int bond_miimon_next_update; /* Time of next miimon update. */
long long int bond_next_fake_iface_update; /* Time of next update. */
+ struct netdev_monitor *monitor; /* Tracks carrier up/down status. */
+
+ /* SLB specific bonding info. */
+ struct bond_entry *bond_hash; /* An array of (BOND_MASK + 1) elements. */
int bond_rebalance_interval; /* Interval between rebalances, in ms. */
long long int bond_next_rebalance; /* Next rebalancing time. */
- struct netdev_monitor *monitor; /* Tracks carrier up/down status. */
/* Port mirroring info. */
mirror_mask_t src_mirrors; /* Mirrors triggered when packet received. */
bool is_mirror_output_port; /* Does port mirroring send frames here? */
};
-#define DP_MAX_PORTS 255
struct bridge {
struct list node; /* Node in global list of bridges. */
char *name; /* User-specified arbitrary name. */
static void shash_from_ovs_idl_map(char **keys, char **values, size_t n,
struct shash *);
+static void shash_to_ovs_idl_map(struct shash *,
+ char ***keys, char ***values, size_t *n);
+
/* Hooks into ofproto processing. */
static struct ofhooks bridge_ofhooks;
bond_init();
}
+void
+bridge_exit(void)
+{
+ struct bridge *br, *next_br;
+
+ LIST_FOR_EACH_SAFE (br, next_br, node, &all_bridges) {
+ bridge_destroy(br);
+ }
+ ovsdb_idl_destroy(idl);
+}
+
/* Performs configuration that is only necessary once at ovs-vswitchd startup,
* but for which the ovs-vswitchd configuration 'cfg' is required. */
static void
svec_init(&dpif_types);
dp_enumerate_types(&dpif_types);
for (i = 0; i < dpif_types.n; i++) {
- struct dpif *dpif;
- int retval;
size_t j;
dp_enumerate_names(dpif_types.names[i], &dpif_names);
- /* For each dpif... */
+ /* Delete each dpif whose name is not in 'bridge_names'. */
for (j = 0; j < dpif_names.n; j++) {
- retval = dpif_open(dpif_names.names[j], dpif_types.names[i], &dpif);
- if (!retval) {
- struct svec all_names;
- size_t k;
-
- /* ...check whether any of its names is in 'bridge_names'. */
- svec_init(&all_names);
- dpif_get_all_names(dpif, &all_names);
- for (k = 0; k < all_names.n; k++) {
- if (svec_contains(&bridge_names, all_names.names[k])) {
- goto found;
- }
+ if (!svec_contains(&bridge_names, dpif_names.names[j])) {
+ struct dpif *dpif;
+ int retval;
+
+ retval = dpif_open(dpif_names.names[j], dpif_types.names[i],
+ &dpif);
+ if (!retval) {
+ dpif_delete(dpif);
+ dpif_close(dpif);
}
-
- /* No. Delete the dpif. */
- dpif_delete(dpif);
-
- found:
- svec_destroy(&all_names);
- dpif_close(dpif);
}
}
}
svec_destroy(&dpif_types);
}
-/* Initializes 'options' and fills it with the options for 'if_cfg'. Merges
- * keys from "options" and "other_config", preferring "options" keys over
- * "other_config" keys. */
-static void
-iface_get_options(const struct ovsrec_interface *if_cfg, struct shash *options)
-{
- size_t i;
-
- shash_from_ovs_idl_map(if_cfg->key_options, if_cfg->value_options,
- if_cfg->n_options, options);
-
- for (i = 0; i < if_cfg->n_other_config; i++) {
- char *key = if_cfg->key_other_config[i];
- char *value = if_cfg->value_other_config[i];
-
- if (!shash_find_data(options, key)) {
- shash_add(options, key, value);
- } else {
- VLOG_WARN("%s: ignoring \"other_config\" key %s that conflicts "
- "with \"options\" key %s", if_cfg->name, key, key);
- }
- }
-}
-
/* Callback for iterate_and_prune_ifaces(). */
static bool
check_iface(struct bridge *br, struct iface *iface, void *aux OVS_UNUSED)
* that port already belongs to a different datapath, so we must do all
* port deletions before any port additions. */
LIST_FOR_EACH (br, node, &all_bridges) {
- struct odp_port *dpif_ports;
- size_t n_dpif_ports;
+ struct dpif_port_dump dump;
struct shash want_ifaces;
+ struct dpif_port dpif_port;
- dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports);
bridge_get_all_ifaces(br, &want_ifaces);
- for (i = 0; i < n_dpif_ports; i++) {
- const struct odp_port *p = &dpif_ports[i];
- if (!shash_find(&want_ifaces, p->devname)
- && strcmp(p->devname, br->name)) {
- int retval = dpif_port_del(br->dpif, p->port);
+ DPIF_PORT_FOR_EACH (&dpif_port, &dump, br->dpif) {
+ if (!shash_find(&want_ifaces, dpif_port.name)
+ && strcmp(dpif_port.name, br->name)) {
+ int retval = dpif_port_del(br->dpif, dpif_port.port_no);
if (retval) {
VLOG_ERR("failed to remove %s interface from %s: %s",
- p->devname, dpif_name(br->dpif),
+ dpif_port.name, dpif_name(br->dpif),
strerror(retval));
}
}
}
shash_destroy(&want_ifaces);
- free(dpif_ports);
}
LIST_FOR_EACH (br, node, &all_bridges) {
- struct odp_port *dpif_ports;
- size_t n_dpif_ports;
struct shash cur_ifaces, want_ifaces;
+ struct dpif_port_dump dump;
+ struct dpif_port dpif_port;
/* Get the set of interfaces currently in this datapath. */
- dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports);
shash_init(&cur_ifaces);
- for (i = 0; i < n_dpif_ports; i++) {
- const char *name = dpif_ports[i].devname;
- shash_add_once(&cur_ifaces, name, &dpif_ports[i]);
+ DPIF_PORT_FOR_EACH (&dpif_port, &dump, br->dpif) {
+ struct dpif_port *port_info = xmalloc(sizeof *port_info);
+ dpif_port_clone(port_info, &dpif_port);
+ shash_add(&cur_ifaces, dpif_port.name, port_info);
}
/* Get the set of interfaces we want on this datapath. */
SHASH_FOR_EACH (node, &want_ifaces) {
const char *if_name = node->name;
struct iface *iface = node->data;
- struct odp_port *dpif_port = shash_find_data(&cur_ifaces, if_name);
- const char *type = iface ? iface->type : "internal";
+ struct dpif_port *dpif_port;
+ const char *type;
int error;
+ type = iface ? iface->type : "internal";
+ dpif_port = shash_find_data(&cur_ifaces, if_name);
+
/* If we have a port or a netdev already, and it's not the type we
* want, then delete the port (if any) and close the netdev (if
* any). */
|| (iface && iface->netdev
&& strcmp(type, netdev_get_type(iface->netdev)))) {
if (dpif_port) {
- error = ofproto_port_del(br->ofproto, dpif_port->port);
+ error = ofproto_port_del(br->ofproto, dpif_port->port_no);
if (error) {
continue;
}
shash_init(&args);
if (iface) {
- iface_get_options(iface->cfg, &args);
+ shash_from_ovs_idl_map(iface->cfg->key_options,
+ iface->cfg->value_options,
+ iface->cfg->n_options, &args);
}
error = netdev_open(&options, &netdev);
shash_destroy(&args);
struct shash args;
shash_init(&args);
- iface_get_options(iface->cfg, &args);
- netdev_reconfigure(iface->netdev, &args);
+ shash_from_ovs_idl_map(iface->cfg->key_options,
+ iface->cfg->value_options,
+ iface->cfg->n_options, &args);
+ netdev_set_config(iface->netdev, &args);
shash_destroy(&args);
}
}
- free(dpif_ports);
- shash_destroy(&cur_ifaces);
shash_destroy(&want_ifaces);
+
+ SHASH_FOR_EACH (node, &cur_ifaces) {
+ struct dpif_port *port_info = node->data;
+ dpif_port_destroy(port_info);
+ free(port_info);
+ }
+ shash_destroy(&cur_ifaces);
}
sflow_bridge_number = 0;
LIST_FOR_EACH (br, node, &all_bridges) {
return eth_addr_to_uint64(hash);
}
+static void
+iface_refresh_status(struct iface *iface)
+{
+ struct shash sh;
+
+ enum netdev_flags flags;
+ uint32_t current;
+ int64_t bps;
+ int mtu;
+ int64_t mtu_64;
+ int error;
+
+ shash_init(&sh);
+
+ if (!netdev_get_status(iface->netdev, &sh)) {
+ size_t n;
+ char **keys, **values;
+
+ shash_to_ovs_idl_map(&sh, &keys, &values, &n);
+ ovsrec_interface_set_status(iface->cfg, keys, values, n);
+
+ free(keys);
+ free(values);
+ } else {
+ ovsrec_interface_set_status(iface->cfg, NULL, NULL, 0);
+ }
+
+ shash_destroy_free_data(&sh);
+
+ error = netdev_get_flags(iface->netdev, &flags);
+ if (!error) {
+ ovsrec_interface_set_admin_state(iface->cfg, flags & NETDEV_UP ? "up" : "down");
+ }
+ else {
+ ovsrec_interface_set_admin_state(iface->cfg, NULL);
+ }
+
+ error = netdev_get_features(iface->netdev, ¤t, NULL, NULL, NULL);
+ if (!error) {
+ ovsrec_interface_set_duplex(iface->cfg,
+ netdev_features_is_full_duplex(current)
+ ? "full" : "half");
+ /* warning: uint64_t -> int64_t conversion */
+ bps = netdev_features_to_bps(current);
+ ovsrec_interface_set_link_speed(iface->cfg, &bps, 1);
+ }
+ else {
+ ovsrec_interface_set_duplex(iface->cfg, NULL);
+ ovsrec_interface_set_link_speed(iface->cfg, NULL, 0);
+ }
+
+
+ ovsrec_interface_set_link_state(iface->cfg,
+ netdev_get_carrier(iface->netdev)
+ ? "up" : "down");
+
+ error = netdev_get_mtu(iface->netdev, &mtu);
+ if (!error) {
+ mtu_64 = mtu;
+ ovsrec_interface_set_mtu(iface->cfg, &mtu_64, 1);
+ }
+ else {
+ ovsrec_interface_set_mtu(iface->cfg, NULL, 0);
+ }
+}
+
static void
iface_refresh_cfm_stats(struct iface *iface)
{
&datum);
}
+static inline const char *
+nx_role_to_str(enum nx_role role)
+{
+ switch (role) {
+ case NX_ROLE_OTHER:
+ return "other";
+ case NX_ROLE_MASTER:
+ return "master";
+ case NX_ROLE_SLAVE:
+ return "slave";
+ default:
+ return "*** INVALID ROLE ***";
+ }
+}
+
+static void
+bridge_refresh_controller_status(const struct bridge *br)
+{
+ struct shash info;
+ const struct ovsrec_controller *cfg;
+
+ ofproto_get_ofproto_controller_info(br->ofproto, &info);
+
+ OVSREC_CONTROLLER_FOR_EACH(cfg, idl) {
+ struct ofproto_controller_info *cinfo =
+ shash_find_data(&info, cfg->target);
+
+ if (cinfo) {
+ ovsrec_controller_set_is_connected(cfg, cinfo->is_connected);
+ ovsrec_controller_set_role(cfg, nx_role_to_str(cinfo->role));
+ ovsrec_controller_set_status(cfg, (char **) cinfo->pairs.keys,
+ (char **) cinfo->pairs.values,
+ cinfo->pairs.n);
+ } else {
+ ovsrec_controller_set_is_connected(cfg, false);
+ ovsrec_controller_set_role(cfg, NULL);
+ ovsrec_controller_set_status(cfg, NULL, NULL, 0);
+ }
+ }
+
+ ofproto_free_ofproto_controller_info(&info);
+}
+
void
bridge_run(void)
{
/* (Re)configure if necessary. */
database_changed = ovsdb_idl_run(idl);
cfg = ovsrec_open_vswitch_first(idl);
+#ifdef HAVE_OPENSSL
+ /* Re-configure SSL. We do this on every trip through the main loop,
+ * instead of just when the database changes, because the contents of the
+ * key and certificate files can change without the database changing.
+ *
+ * We do this before bridge_reconfigure() because that function might
+ * initiate SSL connections and thus requires SSL to be configured. */
+ if (cfg && cfg->ssl) {
+ const struct ovsrec_ssl *ssl = cfg->ssl;
+
+ stream_ssl_set_key_and_cert(ssl->private_key, ssl->certificate);
+ stream_ssl_set_ca_cert_file(ssl->ca_cert, ssl->bootstrap_ca_cert);
+ }
+#endif
if (database_changed || datapath_destroyed) {
if (cfg) {
struct ovsdb_idl_txn *txn = ovsdb_idl_txn_create(idl);
}
}
-#ifdef HAVE_OPENSSL
- /* Re-configure SSL. We do this on every trip through the main loop,
- * instead of just when the database changes, because the contents of the
- * key and certificate files can change without the database changing. */
- if (cfg && cfg->ssl) {
- const struct ovsrec_ssl *ssl = cfg->ssl;
-
- stream_ssl_set_key_and_cert(ssl->private_key, ssl->certificate);
- stream_ssl_set_ca_cert_file(ssl->ca_cert, ssl->bootstrap_ca_cert);
- }
-#endif
-
/* Refresh system and interface stats if necessary. */
if (time_msec() >= stats_timer) {
if (cfg) {
struct iface *iface = port->ifaces[j];
iface_refresh_stats(iface);
iface_refresh_cfm_stats(iface);
+ iface_refresh_status(iface);
}
}
+ bridge_refresh_controller_status(br);
}
refresh_system_stats(cfg);
ovsdb_idl_txn_commit(txn);
static void
bridge_fetch_dp_ifaces(struct bridge *br)
{
- struct odp_port *dpif_ports;
- size_t n_dpif_ports;
+ struct dpif_port_dump dump;
+ struct dpif_port dpif_port;
size_t i, j;
/* Reset all interface numbers. */
}
hmap_clear(&br->ifaces);
- dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports);
- for (i = 0; i < n_dpif_ports; i++) {
- struct odp_port *p = &dpif_ports[i];
- struct iface *iface = iface_lookup(br, p->devname);
+ DPIF_PORT_FOR_EACH (&dpif_port, &dump, br->dpif) {
+ struct iface *iface = iface_lookup(br, dpif_port.name);
if (iface) {
if (iface->dp_ifidx >= 0) {
VLOG_WARN("%s reported interface %s twice",
- dpif_name(br->dpif), p->devname);
- } else if (iface_from_dp_ifidx(br, p->port)) {
+ dpif_name(br->dpif), dpif_port.name);
+ } else if (iface_from_dp_ifidx(br, dpif_port.port_no)) {
VLOG_WARN("%s reported interface %"PRIu16" twice",
- dpif_name(br->dpif), p->port);
+ dpif_name(br->dpif), dpif_port.port_no);
} else {
- iface->dp_ifidx = p->port;
+ iface->dp_ifidx = dpif_port.port_no;
hmap_insert(&br->ifaces, &iface->dp_ifidx_node,
hash_int(iface->dp_ifidx, 0));
}
: -1));
}
}
- free(dpif_ports);
}
\f
/* Bridge packet processing functions. */
lookup_bond_entry(const struct port *port, const uint8_t mac[ETH_ADDR_LEN],
uint16_t vlan)
{
+ assert(port->bond_mode == BM_SLB);
return &port->bond_hash[bond_hash(mac, vlan)];
}
assert(port->n_ifaces);
if (port->n_ifaces == 1) {
iface = port->ifaces[0];
- } else {
+ } else if (port->bond_mode == BM_AB) {
+ if (port->active_iface < 0) {
+ *tags |= port->no_ifaces_tag;
+ return false;
+ }
+ iface = port->ifaces[port->active_iface];
+ } else if (port->bond_mode == BM_SLB){
struct bond_entry *e = lookup_bond_entry(port, dl_src, vlan);
if (e->iface_idx < 0 || e->iface_idx >= port->n_ifaces
|| !port->ifaces[e->iface_idx]->enabled) {
}
*tags |= e->iface_tag;
iface = port->ifaces[e->iface_idx];
+ } else {
+ NOT_REACHED();
}
*dp_ifidx = iface->dp_ifidx;
*tags |= iface->tag; /* Currently only used for bonding. */
/* Nothing to do. */
return;
}
- VLOG_INFO_RL(&rl, "interface %s: carrier %s",
- iface->name, carrier ? "detected" : "dropped");
+ VLOG_INFO_RL(&rl, "interface %s: link state %s",
+ iface->name, carrier ? "up" : "down");
if (carrier == iface->enabled) {
iface->delay_expires = LLONG_MAX;
VLOG_INFO_RL(&rl, "interface %s: will not be %s",
if (port->n_ifaces >= 2) {
char *devname;
- /* Track carrier going up and down on interfaces. */
- while (!netdev_monitor_poll(port->monitor, &devname)) {
- struct iface *iface;
+ if (port->monitor) {
+ assert(!port->miimon);
- iface = port_lookup_iface(port, devname);
- if (iface) {
- bool carrier = netdev_get_carrier(iface->netdev);
+ /* Track carrier going up and down on interfaces. */
+ while (!netdev_monitor_poll(port->monitor, &devname)) {
+ struct iface *iface;
+
+ iface = port_lookup_iface(port, devname);
+ if (iface) {
+ bool up = netdev_get_carrier(iface->netdev);
+
+ bond_link_status_update(iface, up);
+ port_update_bond_compat(port);
+ }
+ free(devname);
+ }
+ } else {
+ assert(port->miimon);
- bond_link_status_update(iface, carrier);
- port_update_bond_compat(port);
+ if (time_msec() >= port->bond_miimon_next_update) {
+ for (j = 0; j < port->n_ifaces; j++) {
+ struct iface *iface = port->ifaces[j];
+ bool up = netdev_get_miimon(iface->netdev);
+
+ bond_link_status_update(iface, up);
+ port_update_bond_compat(port);
+ }
+ port->bond_miimon_next_update = time_msec() +
+ port->bond_miimon_interval;
}
- free(devname);
}
for (j = 0; j < port->n_ifaces; j++) {
if (port->n_ifaces < 2) {
continue;
}
- netdev_monitor_poll_wait(port->monitor);
+
+ if (port->monitor) {
+ netdev_monitor_poll_wait(port->monitor);
+ }
+
+ if (port->miimon) {
+ poll_timer_wait_until(port->bond_miimon_next_update);
+ }
+
for (j = 0; j < port->n_ifaces; j++) {
struct iface *iface = port->ifaces[j];
if (iface->delay_expires != LLONG_MAX) {
}
static bool
-set_dst(struct dst *p, const struct flow *flow,
+set_dst(struct dst *dst, const struct flow *flow,
const struct port *in_port, const struct port *out_port,
tag_type *tags)
{
- p->vlan = (out_port->vlan >= 0 ? OFP_VLAN_NONE
+ dst->vlan = (out_port->vlan >= 0 ? OFP_VLAN_NONE
: in_port->vlan >= 0 ? in_port->vlan
: flow->vlan_tci == 0 ? OFP_VLAN_NONE
: vlan_tci_to_vid(flow->vlan_tci));
- return choose_output_iface(out_port, flow->dl_src, p->vlan,
- &p->dp_ifidx, tags);
+ return choose_output_iface(out_port, flow->dl_src, dst->vlan,
+ &dst->dp_ifidx, tags);
}
static void
* vlan, but in most cases there are at most two different vlan tags so that's
* possibly overkill.) */
static void
-partition_dsts(struct dst *dsts, size_t n_dsts, int vlan)
+partition_dsts(struct dst_set *set, int vlan)
{
- struct dst *first = dsts;
- struct dst *last = dsts + n_dsts;
+ struct dst *first = set->dsts;
+ struct dst *last = set->dsts + set->n;
while (first != last) {
/* Invariants:
return ffs(mask);
}
+static void
+dst_set_init(struct dst_set *set)
+{
+ set->dsts = set->builtin;
+ set->n = 0;
+ set->allocated = ARRAY_SIZE(set->builtin);
+}
+
+static void
+dst_set_add(struct dst_set *set, const struct dst *dst)
+{
+ if (set->n >= set->allocated) {
+ size_t new_allocated;
+ struct dst *new_dsts;
+
+ new_allocated = set->allocated * 2;
+ new_dsts = xmalloc(new_allocated * sizeof *new_dsts);
+ memcpy(new_dsts, set->dsts, set->n * sizeof *new_dsts);
+
+ dst_set_free(set);
+
+ set->dsts = new_dsts;
+ set->allocated = new_allocated;
+ }
+ set->dsts[set->n++] = *dst;
+}
+
+static void
+dst_set_free(struct dst_set *set)
+{
+ if (set->dsts != set->builtin) {
+ free(set->dsts);
+ }
+}
+
static bool
-dst_is_duplicate(const struct dst *dsts, size_t n_dsts,
- const struct dst *test)
+dst_is_duplicate(const struct dst_set *set, const struct dst *test)
{
size_t i;
- for (i = 0; i < n_dsts; i++) {
- if (dsts[i].vlan == test->vlan && dsts[i].dp_ifidx == test->dp_ifidx) {
+ for (i = 0; i < set->n; i++) {
+ if (set->dsts[i].vlan == test->vlan
+ && set->dsts[i].dp_ifidx == test->dp_ifidx) {
return true;
}
}
return true;
}
-static size_t
+static void
compose_dsts(const struct bridge *br, const struct flow *flow, uint16_t vlan,
const struct port *in_port, const struct port *out_port,
- struct dst dsts[], tag_type *tags, uint16_t *nf_output_iface)
+ struct dst_set *set, tag_type *tags, uint16_t *nf_output_iface)
{
mirror_mask_t mirrors = in_port->src_mirrors;
+ struct dst dst;
int flow_vlan;
- struct dst *dst = dsts;
size_t i;
flow_vlan = vlan_tci_to_vid(flow->vlan_tci);
}
if (out_port == FLOOD_PORT) {
- /* XXX use ODP_FLOOD if no vlans or bonding. */
- /* XXX even better, define each VLAN as a datapath port group */
for (i = 0; i < br->n_ports; i++) {
struct port *port = br->ports[i];
if (port != in_port
&& port_is_floodable(port)
&& port_includes_vlan(port, vlan)
&& !port->is_mirror_output_port
- && set_dst(dst, flow, in_port, port, tags)) {
+ && set_dst(&dst, flow, in_port, port, tags)) {
mirrors |= port->dst_mirrors;
- dst++;
+ dst_set_add(set, &dst);
}
}
*nf_output_iface = NF_OUT_FLOOD;
- } else if (out_port && set_dst(dst, flow, in_port, out_port, tags)) {
- *nf_output_iface = dst->dp_ifidx;
+ } else if (out_port && set_dst(&dst, flow, in_port, out_port, tags)) {
+ dst_set_add(set, &dst);
+ *nf_output_iface = dst.dp_ifidx;
mirrors |= out_port->dst_mirrors;
- dst++;
}
while (mirrors) {
struct mirror *m = br->mirrors[mirror_mask_ffs(mirrors) - 1];
if (!m->n_vlans || vlan_is_mirrored(m, vlan)) {
if (m->out_port) {
- if (set_dst(dst, flow, in_port, m->out_port, tags)
- && !dst_is_duplicate(dsts, dst - dsts, dst)) {
- dst++;
+ if (set_dst(&dst, flow, in_port, m->out_port, tags)
+ && !dst_is_duplicate(set, &dst)) {
+ dst_set_add(set, &dst);
}
} else {
for (i = 0; i < br->n_ports; i++) {
struct port *port = br->ports[i];
if (port_includes_vlan(port, m->out_vlan)
- && set_dst(dst, flow, in_port, port, tags))
+ && set_dst(&dst, flow, in_port, port, tags))
{
-
if (port->vlan < 0) {
- dst->vlan = m->out_vlan;
+ dst.vlan = m->out_vlan;
}
- if (dst_is_duplicate(dsts, dst - dsts, dst)) {
+ if (dst_is_duplicate(set, &dst)) {
continue;
}
* tagging tags place. This is necessary because
* dst->vlan is the final vlan, after removing implicit
* tags. */
- if (port == in_port && dst->vlan == flow_vlan) {
+ if (port == in_port && dst.vlan == flow_vlan) {
/* Don't send out input port on same VLAN. */
continue;
}
- dst++;
+ dst_set_add(set, &dst);
}
}
}
mirrors &= mirrors - 1;
}
- partition_dsts(dsts, dst - dsts, flow_vlan);
- return dst - dsts;
+ partition_dsts(set, flow_vlan);
}
static void OVS_UNUSED
-print_dsts(const struct dst *dsts, size_t n)
+print_dsts(const struct dst_set *set)
{
- for (; n--; dsts++) {
- printf(">p%"PRIu16, dsts->dp_ifidx);
- if (dsts->vlan != OFP_VLAN_NONE) {
- printf("v%"PRIu16, dsts->vlan);
+ size_t i;
+
+ for (i = 0; i < set->n; i++) {
+ const struct dst *dst = &set->dsts[i];
+
+ printf(">p%"PRIu16, dst->dp_ifidx);
+ if (dst->vlan != OFP_VLAN_NONE) {
+ printf("v%"PRIu16, dst->vlan);
}
}
}
tag_type *tags, struct ofpbuf *actions,
uint16_t *nf_output_iface)
{
- struct dst dsts[DP_MAX_PORTS * (MAX_MIRRORS + 1)];
- size_t n_dsts;
- const struct dst *p;
+ struct dst_set set;
uint16_t cur_vlan;
+ size_t i;
- n_dsts = compose_dsts(br, flow, vlan, in_port, out_port, dsts, tags,
- nf_output_iface);
+ dst_set_init(&set);
+ compose_dsts(br, flow, vlan, in_port, out_port, &set, tags,
+ nf_output_iface);
cur_vlan = vlan_tci_to_vid(flow->vlan_tci);
if (cur_vlan == 0) {
cur_vlan = OFP_VLAN_NONE;
}
- for (p = dsts; p < &dsts[n_dsts]; p++) {
- if (p->vlan != cur_vlan) {
- if (p->vlan == OFP_VLAN_NONE) {
- nl_msg_put_flag(actions, ODPAT_STRIP_VLAN);
+ for (i = 0; i < set.n; i++) {
+ const struct dst *dst = &set.dsts[i];
+ if (dst->vlan != cur_vlan) {
+ if (dst->vlan == OFP_VLAN_NONE) {
+ nl_msg_put_flag(actions, ODP_ACTION_ATTR_STRIP_VLAN);
} else {
ovs_be16 tci;
- tci = htons(p->vlan & VLAN_VID_MASK);
+ tci = htons(dst->vlan & VLAN_VID_MASK);
tci |= flow->vlan_tci & htons(VLAN_PCP_MASK);
- nl_msg_put_be16(actions, ODPAT_SET_DL_TCI, tci);
+ nl_msg_put_be16(actions, ODP_ACTION_ATTR_SET_DL_TCI, tci);
}
- cur_vlan = p->vlan;
+ cur_vlan = dst->vlan;
}
- nl_msg_put_u32(actions, ODPAT_OUTPUT, p->dp_ifidx);
+ nl_msg_put_u32(actions, ODP_ACTION_ATTR_OUTPUT, dst->dp_ifidx);
}
+ dst_set_free(&set);
}
/* Returns the effective vlan of a packet, taking into account both the
return;
}
NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) {
- if (nl_attr_type(a) == ODPAT_OUTPUT) {
+ if (nl_attr_type(a) == ODP_ACTION_ATTR_OUTPUT) {
struct port *out_port = port_from_dp_ifidx(br, nl_attr_get_u32(a));
- if (out_port && out_port->n_ifaces >= 2) {
+ if (out_port && out_port->n_ifaces >= 2 &&
+ out_port->bond_mode == BM_SLB) {
uint16_t vlan = (flow->vlan_tci
? vlan_tci_to_vid(flow->vlan_tci)
: OFP_VLAN_NONE);
now = time_msec();
for (i = 0; i < br->n_ports; i++) {
struct port *port = br->ports[i];
- if (port->n_ifaces > 1 && now >= port->bond_next_rebalance) {
+ if (port->n_ifaces > 1 && port->bond_mode == BM_SLB
+ && now >= port->bond_next_rebalance) {
port->bond_next_rebalance = now + port->bond_rebalance_interval;
bond_rebalance_port(port);
}
size_t n_hashes;
};
+static const char *
+bond_mode_to_string(enum bond_mode bm) {
+ static char *bm_slb = "balance-slb";
+ static char *bm_ab = "active-backup";
+
+ switch (bm) {
+ case BM_SLB: return bm_slb;
+ case BM_AB: return bm_ab;
+ }
+
+ NOT_REACHED();
+ return NULL;
+}
+
/* Sorts pointers to pointers to bond_entries in ascending order by the
* interface to which they are assigned, and within a single interface in
* ascending order of bytes transmitted. */
struct port *port = from->iface->port;
uint64_t delta = hash->tx_bytes;
+ assert(port->bond_mode == BM_SLB);
+
VLOG_INFO("bond %s: shift %"PRIu64"kB of load (with hash %td) "
"from %s to %s (now carrying %"PRIu64"kB and "
"%"PRIu64"kB load, respectively)",
static void
bond_rebalance_port(struct port *port)
{
- struct slave_balance bals[DP_MAX_PORTS];
+ struct slave_balance *bals;
size_t n_bals;
struct bond_entry *hashes[BOND_MASK + 1];
struct slave_balance *b, *from, *to;
struct bond_entry *e;
size_t i;
+ assert(port->bond_mode == BM_SLB);
+
/* Sets up 'bals' to describe each of the port's interfaces, sorted in
* descending order of tx_bytes, so that bals[0] represents the most
* heavily loaded slave and bals[n_bals - 1] represents the least heavily
* become contiguous in memory, and then we point each 'hashes' members of
* a slave_balance structure to the start of a contiguous group. */
n_bals = port->n_ifaces;
+ bals = xmalloc(n_bals * sizeof *bals);
for (b = bals; b < &bals[n_bals]; b++) {
b->iface = port->ifaces[b - bals];
b->tx_bytes = 0;
while (!bals[n_bals - 1].iface->enabled) {
n_bals--;
if (!n_bals) {
- return;
+ goto exit;
}
}
for (e = &port->bond_hash[0]; e <= &port->bond_hash[BOND_MASK]; e++) {
e->tx_bytes /= 2;
}
+
+exit:
+ free(bals);
}
static void
struct ds ds = DS_EMPTY_INITIALIZER;
const struct bridge *br;
- ds_put_cstr(&ds, "bridge\tbond\tslaves\n");
+ ds_put_cstr(&ds, "bridge\tbond\ttype\tslaves\n");
LIST_FOR_EACH (br, node, &all_bridges) {
size_t i;
if (port->n_ifaces > 1) {
size_t j;
- ds_put_format(&ds, "%s\t%s\t", br->name, port->name);
+ ds_put_format(&ds, "%s\t%s\t%s\t", br->name, port->name,
+ bond_mode_to_string(port->bond_mode));
for (j = 0; j < port->n_ifaces; j++) {
const struct iface *iface = port->ifaces[j];
if (j) {
return;
}
+ ds_put_format(&ds, "bond_mode: %s\n",
+ bond_mode_to_string(port->bond_mode));
+ ds_put_format(&ds, "bond-detect-mode: %s\n",
+ port->miimon ? "miimon" : "carrier");
+
+ if (port->miimon) {
+ ds_put_format(&ds, "bond-miimon-interval: %lld\n",
+ port->bond_miimon_interval);
+ }
+
ds_put_format(&ds, "updelay: %d ms\n", port->updelay);
ds_put_format(&ds, "downdelay: %d ms\n", port->downdelay);
- ds_put_format(&ds, "next rebalance: %lld ms\n",
- port->bond_next_rebalance - time_msec());
+
+ if (port->bond_mode == BM_SLB) {
+ ds_put_format(&ds, "next rebalance: %lld ms\n",
+ port->bond_next_rebalance - time_msec());
+ }
+
for (j = 0; j < port->n_ifaces; j++) {
const struct iface *iface = port->ifaces[j];
struct bond_entry *be;
iface->delay_expires - time_msec());
}
+ if (port->bond_mode != BM_SLB) {
+ continue;
+ }
+
/* Hashes. */
for (be = port->bond_hash; be <= &port->bond_hash[BOND_MASK]; be++) {
int hash = be - port->bond_hash;
return;
}
+ if (port->bond_mode != BM_SLB) {
+ unixctl_command_reply(conn, 501, "not an SLB bond");
+ return;
+ }
+
if (strspn(hash_s, "0123456789") == strlen(hash_s)) {
hash = atoi(hash_s) & BOND_MASK;
} else {
static void
port_reconfigure(struct port *port, const struct ovsrec_port *cfg)
{
+ const char *detect_mode;
struct shash new_ifaces;
- long long int next_rebalance;
+ long long int next_rebalance, miimon_next_update;
unsigned long *trunks;
int vlan;
size_t i;
port->bond_next_rebalance = next_rebalance;
}
+ detect_mode = get_port_other_config(cfg, "bond-detect-mode",
+ "carrier");
+
+ if (!strcmp(detect_mode, "carrier")) {
+ port->miimon = false;
+ } else if (!strcmp(detect_mode, "miimon")) {
+ port->miimon = true;
+ } else {
+ port->miimon = false;
+ VLOG_WARN("port %s: unsupported bond-detect-mode %s, defaulting to "
+ "carrier", port->name, detect_mode);
+ }
+
+ port->bond_miimon_interval = atoi(
+ get_port_other_config(cfg, "bond-miimon-interval", "200"));
+ if (port->bond_miimon_interval < 100) {
+ port->bond_miimon_interval = 100;
+ }
+ miimon_next_update = time_msec() + port->bond_miimon_interval;
+ if (port->bond_miimon_next_update > miimon_next_update) {
+ port->bond_miimon_next_update = miimon_next_update;
+ }
+
+ if (!port->cfg->bond_mode ||
+ !strcmp(port->cfg->bond_mode, bond_mode_to_string(BM_SLB))) {
+ port->bond_mode = BM_SLB;
+ } else if (!strcmp(port->cfg->bond_mode, bond_mode_to_string(BM_AB))) {
+ port->bond_mode = BM_AB;
+ } else {
+ port->bond_mode = BM_SLB;
+ VLOG_WARN("port %s: unknown bond_mode %s, defaulting to %s",
+ port->name, port->cfg->bond_mode,
+ bond_mode_to_string(port->bond_mode));
+ }
+
/* Add new interfaces and update 'cfg' member of existing ones. */
shash_init(&new_ifaces);
for (i = 0; i < cfg->n_interfaces; i++) {
free(port->bond_hash);
port->bond_hash = NULL;
port->bond_compat_is_stale = true;
- port->bond_fake_iface = false;
}
+
+ port->bond_fake_iface = false;
} else {
size_t i;
- if (!port->bond_hash) {
+ if (port->bond_mode == BM_SLB && !port->bond_hash) {
port->bond_hash = xcalloc(BOND_MASK + 1, sizeof *port->bond_hash);
for (i = 0; i <= BOND_MASK; i++) {
struct bond_entry *e = &port->bond_hash[i];
if (port->cfg->bond_fake_iface) {
port->bond_next_fake_iface_update = time_msec();
}
+ } else if (port->bond_mode != BM_SLB) {
+ free(port->bond_hash);
+ port->bond_hash = NULL;
}
port->bond_compat_is_stale = true;
port->bond_fake_iface = port->cfg->bond_fake_iface;
- port->monitor = netdev_monitor_create();
- for (i = 0; i < port->n_ifaces; i++) {
- netdev_monitor_add(port->monitor, port->ifaces[i]->netdev);
+ if (!port->miimon) {
+ port->monitor = netdev_monitor_create();
+ for (i = 0; i < port->n_ifaces; i++) {
+ netdev_monitor_add(port->monitor, port->ifaces[i]->netdev);
+ }
}
}
}
struct compat_bond bond;
size_t i;
- if (port->n_ifaces < 2) {
+ if (port->n_ifaces < 2 || port->bond_mode != BM_SLB) {
proc_net_compat_update_bond(port->name, NULL);
return;
}
bool del_active = port->active_iface == iface->port_ifidx;
struct iface *del;
+ if (port->monitor) {
+ netdev_monitor_remove(port->monitor, iface->netdev);
+ }
+
shash_find_and_delete_assert(&br->iface_by_name, iface->name);
if (iface->dp_ifidx >= 0) {
}
}
+/* Creates 'keys' and 'values' arrays from 'shash'.
+ *
+ * Sets 'keys' and 'values' to heap allocated arrays representing the key-value
+ * pairs in 'shash'. The caller takes ownership of 'keys' and 'values'. They
+ * are populated with with strings taken directly from 'shash' and thus have
+ * the same ownership of the key-value pairs in shash.
+ */
+static void
+shash_to_ovs_idl_map(struct shash *shash,
+ char ***keys, char ***values, size_t *n)
+{
+ size_t i, count;
+ char **k, **v;
+ struct shash_node *sn;
+
+ count = shash_count(shash);
+
+ k = xmalloc(count * sizeof *k);
+ v = xmalloc(count * sizeof *v);
+
+ i = 0;
+ SHASH_FOR_EACH(sn, shash) {
+ k[i] = sn->name;
+ v[i] = sn->data;
+ i++;
+ }
+
+ *n = count;
+ *keys = k;
+ *values = v;
+}
+
struct iface_delete_queues_cbdata {
struct netdev *netdev;
const struct ovsdb_datum *queues;