-/* Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks
+/* Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira Networks
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include "ovsdb-data.h"
#include "packets.h"
#include "poll-loop.h"
-#include "proc-net-compat.h"
#include "process.h"
#include "sha1.h"
#include "shash.h"
tag_type active_iface_tag; /* Tag for bcast flows. */
tag_type no_ifaces_tag; /* Tag for flows when all ifaces disabled. */
int updelay, downdelay; /* Delay before iface goes up/down, in ms. */
- bool bond_compat_is_stale; /* Need to call port_update_bond_compat()? */
bool bond_fake_iface; /* Fake a bond interface for legacy compat? */
bool miimon; /* Use miimon instead of carrier? */
long long int bond_miimon_interval; /* Miimon status refresh interval. */
static void bridge_reconfigure_remotes(struct bridge *,
const struct sockaddr_in *managers,
size_t n_managers);
+static void bridge_reconfigure_remotes_late(struct bridge *);
static void bridge_get_all_ifaces(const struct bridge *, struct shash *ifaces);
static void bridge_fetch_dp_ifaces(struct bridge *);
static void bridge_flush(struct bridge *);
static uint64_t dpid_from_hash(const void *, size_t nbytes);
static unixctl_cb_func bridge_unixctl_fdb_show;
+static unixctl_cb_func qos_unixctl_show;
-static void lacp_run(struct bridge *);
-static void lacp_wait(struct bridge *);
+static void lacp_run(struct port *);
+static void lacp_wait(struct port *);
static void lacp_process_packet(const struct ofpbuf *, struct iface *);
static void bond_init(void);
-static void bond_run(struct bridge *);
-static void bond_wait(struct bridge *);
+static void bond_run(struct port *);
+static void bond_wait(struct port *);
static void bond_rebalance_port(struct port *);
static void bond_send_learning_packets(struct port *);
static void bond_enable_slave(struct iface *iface, bool enable);
+static void port_run(struct port *);
+static void port_wait(struct port *);
static struct port *port_create(struct bridge *, const char *name);
static void port_reconfigure(struct port *, const struct ovsrec_port *);
static void port_del_ifaces(struct port *, const struct ovsrec_port *);
static struct iface *port_lookup_iface(const struct port *, const char *name);
static struct port *port_from_dp_ifidx(const struct bridge *,
uint16_t dp_ifidx);
-static void port_update_bond_compat(struct port *);
-static void port_update_vlan_compat(struct port *);
static void port_update_bonding(struct port *);
static void port_update_lacp(struct port *);
const struct ovsrec_interface *if_cfg);
static void iface_destroy(struct iface *);
static struct iface *iface_lookup(const struct bridge *, const char *name);
+static struct iface *iface_find(const char *name);
static struct iface *iface_from_dp_ifidx(const struct bridge *,
uint16_t dp_ifidx);
static void iface_set_mac(struct iface *);
/* Register unixctl commands. */
unixctl_command_register("fdb/show", bridge_unixctl_fdb_show, NULL);
+ unixctl_command_register("qos/show", qos_unixctl_show, NULL);
unixctl_command_register("bridge/dump-flows", bridge_unixctl_dump_flows,
NULL);
unixctl_command_register("bridge/reconnect", bridge_unixctl_reconnect,
if (port->n_ifaces) {
i++;
} else {
- VLOG_ERR("%s port has no interfaces, dropping", port->name);
+ VLOG_WARN("%s port has no interfaces, dropping", port->name);
port_destroy(port);
}
}
struct shash targets;
size_t i;
- /* Collect all of the potential targets, as the union of the "managers"
- * column and the "targets" columns of the rows pointed to by
- * "manager_options", excluding any that are out-of-band. */
+ /* Collect all of the potential targets from the "targets" columns of the
+ * rows pointed to by "manager_options", excluding any that are
+ * out-of-band. */
shash_init(&targets);
- for (i = 0; i < ovs_cfg->n_managers; i++) {
- shash_add_once(&targets, ovs_cfg->managers[i], NULL);
- }
for (i = 0; i < ovs_cfg->n_manager_options; i++) {
struct ovsrec_manager *m = ovs_cfg->manager_options[i];
&& strcmp(dpif_port.name, br->name)) {
int retval = dpif_port_del(br->dpif, dpif_port.port_no);
if (retval) {
- VLOG_ERR("failed to remove %s interface from %s: %s",
- dpif_port.name, dpif_name(br->dpif),
- strerror(retval));
+ VLOG_WARN("failed to remove %s interface from %s: %s",
+ dpif_port.name, dpif_name(br->dpif),
+ strerror(retval));
}
}
}
dpif_name(br->dpif));
break;
} else {
- VLOG_ERR("failed to add %s interface to %s: %s",
- if_name, dpif_name(br->dpif),
- strerror(error));
+ VLOG_WARN("failed to add %s interface to %s: %s",
+ if_name, dpif_name(br->dpif),
+ strerror(error));
continue;
}
}
struct port *port = br->ports[i];
int j;
- port_update_vlan_compat(port);
port_update_bonding(port);
port_update_lacp(port);
HMAP_FOR_EACH (iface, dp_ifidx_node, &br->ifaces) {
iface_update_cfm(iface);
}
+ bridge_reconfigure_remotes_late(br);
}
free(managers);
/* Otherwise choose the minimum non-local MAC address among all of the
* interfaces. */
- memset(ea, 0xff, sizeof ea);
+ memset(ea, 0xff, ETH_ADDR_LEN);
for (i = 0; i < br->n_ports; i++) {
struct port *port = br->ports[i];
uint8_t iface_ea[ETH_ADDR_LEN];
ovsrec_interface_set_statistics(iface->cfg, keys, values, n);
}
+static bool
+enable_system_stats(const struct ovsrec_open_vswitch *cfg)
+{
+ const char *enable;
+
+ /* Use other-config:enable-system-stats by preference. */
+ enable = get_ovsrec_key_value(&cfg->header_,
+ &ovsrec_open_vswitch_col_other_config,
+ "enable-statistics");
+ if (enable) {
+ return !strcmp(enable, "true");
+ }
+
+ /* Disable by default. */
+ return false;
+}
+
static void
refresh_system_stats(const struct ovsrec_open_vswitch *cfg)
{
struct shash stats;
shash_init(&stats);
- get_system_stats(&stats);
+ if (enable_system_stats(cfg)) {
+ get_system_stats(&stats);
+ }
ovsdb_datum_from_shash(&datum, &stats);
ovsdb_idl_txn_write(&cfg->header_, &ovsrec_open_vswitch_col_statistics,
bridge_wait(void)
{
struct bridge *br;
- struct iface *iface;
LIST_FOR_EACH (br, node, &all_bridges) {
+ size_t i;
+
ofproto_wait(br->ofproto);
if (ofproto_has_primary_controller(br->ofproto)) {
continue;
}
mac_learning_wait(br->ml);
- lacp_wait(br);
- bond_wait(br);
- HMAP_FOR_EACH (iface, dp_ifidx_node, &br->ifaces) {
- if (iface->cfm) {
- cfm_wait(iface->cfm);
- }
+ for (i = 0; i < br->n_ports; i++) {
+ port_wait(br->ports[i]);
}
}
ovsdb_idl_wait(idl);
}
ds_put_format(&ds, "%5d %4d "ETH_ADDR_FMT" %3d\n",
br->ports[e->port]->ifaces[0]->dp_ifidx,
- e->vlan, ETH_ADDR_ARGS(e->mac), mac_entry_age(e));
+ e->vlan, ETH_ADDR_ARGS(e->mac),
+ mac_entry_age(br->ml, e));
}
unixctl_command_reply(conn, 200, ds_cstr(&ds));
ds_destroy(&ds);
}
\f
+/* QoS unixctl user interface functions. */
+
+struct qos_unixctl_show_cbdata {
+ struct ds *ds;
+ struct iface *iface;
+};
+
+static void
+qos_unixctl_show_cb(unsigned int queue_id,
+ const struct shash *details,
+ void *aux)
+{
+ struct qos_unixctl_show_cbdata *data = aux;
+ struct ds *ds = data->ds;
+ struct iface *iface = data->iface;
+ struct netdev_queue_stats stats;
+ struct shash_node *node;
+ int error;
+
+ ds_put_cstr(ds, "\n");
+ if (queue_id) {
+ ds_put_format(ds, "Queue %u:\n", queue_id);
+ } else {
+ ds_put_cstr(ds, "Default:\n");
+ }
+
+ SHASH_FOR_EACH (node, details) {
+ ds_put_format(ds, "\t%s: %s\n", node->name, (char *)node->data);
+ }
+
+ error = netdev_get_queue_stats(iface->netdev, queue_id, &stats);
+ if (!error) {
+ if (stats.tx_packets != UINT64_MAX) {
+ ds_put_format(ds, "\ttx_packets: %"PRIu64"\n", stats.tx_packets);
+ }
+
+ if (stats.tx_bytes != UINT64_MAX) {
+ ds_put_format(ds, "\ttx_bytes: %"PRIu64"\n", stats.tx_bytes);
+ }
+
+ if (stats.tx_errors != UINT64_MAX) {
+ ds_put_format(ds, "\ttx_errors: %"PRIu64"\n", stats.tx_errors);
+ }
+ } else {
+ ds_put_format(ds, "\tFailed to get statistics for queue %u: %s",
+ queue_id, strerror(error));
+ }
+}
+
+static void
+qos_unixctl_show(struct unixctl_conn *conn,
+ const char *args, void *aux OVS_UNUSED)
+{
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ struct shash sh = SHASH_INITIALIZER(&sh);
+ struct iface *iface;
+ const char *type;
+ struct shash_node *node;
+ struct qos_unixctl_show_cbdata data;
+ int error;
+
+ iface = iface_find(args);
+ if (!iface) {
+ unixctl_command_reply(conn, 501, "no such interface");
+ return;
+ }
+
+ netdev_get_qos(iface->netdev, &type, &sh);
+
+ if (*type != '\0') {
+ ds_put_format(&ds, "QoS: %s %s\n", iface->name, type);
+
+ SHASH_FOR_EACH (node, &sh) {
+ ds_put_format(&ds, "%s: %s\n", node->name, (char *)node->data);
+ }
+
+ data.ds = &ds;
+ data.iface = iface;
+ error = netdev_dump_queues(iface->netdev, qos_unixctl_show_cb, &data);
+
+ if (error) {
+ ds_put_format(&ds, "failed to dump queues: %s", strerror(error));
+ }
+ unixctl_command_reply(conn, 200, ds_cstr(&ds));
+ } else {
+ ds_put_format(&ds, "QoS not configured on %s\n", iface->name);
+ unixctl_command_reply(conn, 501, ds_cstr(&ds));
+ }
+
+ shash_destroy_free_data(&sh);
+ ds_destroy(&ds);
+}
+\f
/* Bridge reconfiguration functions. */
static struct bridge *
bridge_create(const struct ovsrec_bridge *br_cfg)
br->name = xstrdup(br_cfg->name);
br->cfg = br_cfg;
- br->ml = mac_learning_create();
+ br->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME);
eth_addr_nicira_random(br->default_ea);
hmap_init(&br->ifaces);
static int
bridge_run_one(struct bridge *br)
{
+ size_t i;
int error;
- struct iface *iface;
error = ofproto_run1(br->ofproto);
if (error) {
}
mac_learning_run(br->ml, ofproto_get_revalidate_set(br->ofproto));
- lacp_run(br);
- bond_run(br);
+
+ for (i = 0; i < br->n_ports; i++) {
+ port_run(br->ports[i]);
+ }
error = ofproto_run2(br->ofproto, br->flush);
br->flush = false;
- HMAP_FOR_EACH (iface, dp_ifidx_node, &br->ifaces) {
- struct ofpbuf *packet;
-
- if (!iface->cfm) {
- continue;
- }
-
- packet = cfm_run(iface->cfm);
- if (packet) {
- iface_send_packet(iface, packet);
- ofpbuf_uninit(packet);
- free(packet);
- }
- }
-
return error;
}
struct svec snoops, old_snoops;
struct shash_node *node;
enum ofproto_fail_mode fail_mode;
+ const char *idle_time_str;
+ int idle_time;
size_t i;
/* Collect old ports. */
}
ofproto_set_fail_mode(br->ofproto, fail_mode);
+ /* Set the MAC learning aging timeout. */
+ idle_time_str = bridge_get_other_config(br->cfg, "mac-aging-time");
+ idle_time = (idle_time_str && atoi(idle_time_str)
+ ? atoi(idle_time_str)
+ : MAC_ENTRY_DEFAULT_IDLE_TIME);
+ mac_learning_set_idle_time(br->ml, idle_time);
+
/* Delete all flows if we're switching from connected to standalone or vice
* versa. (XXX Should we delete all flows if we are switching from one
* controller to another?) */
oc->max_backoff = 0;
oc->probe_interval = 60;
oc->band = OFPROTO_OUT_OF_BAND;
- oc->accept_re = NULL;
- oc->update_resolv_conf = false;
oc->rate_limit = 0;
oc->burst_limit = 0;
}
oc->probe_interval = c->inactivity_probe ? *c->inactivity_probe / 1000 : 5;
oc->band = (!c->connection_mode || !strcmp(c->connection_mode, "in-band")
? OFPROTO_IN_BAND : OFPROTO_OUT_OF_BAND);
- oc->accept_re = c->discover_accept_regex;
- oc->update_resolv_conf = c->discover_update_resolv_conf;
oc->rate_limit = c->controller_rate_limit ? *c->controller_rate_limit : 0;
oc->burst_limit = (c->controller_burst_limit
? *c->controller_burst_limit : 0);
struct iface *local_iface;
struct in_addr ip;
- /* Controller discovery does its own TCP/IP configuration later. */
- if (strcmp(c->target, "discover")) {
- return;
- }
-
/* If there's no local interface or no IP address, give up. */
local_iface = bridge_get_local_iface(br);
if (!local_iface || !c->local_ip || !inet_aton(c->local_ip, &ip)) {
if (had_primary != ofproto_has_primary_controller(br->ofproto)) {
ofproto_flush_flows(br->ofproto);
}
+}
+/* Does configuration of remotes that must happen after all of the ports and
+ * interfaces are fully configured, that is, when flow translation can be
+ * expected to succeed. (This is because ofproto_add_flow() immediately
+ * re-translates any existing facets for the rule that it replaces, if any.)
+ * In particular, it must be called after port_update_bonding(), to ensure that
+ * 'bond_hash' is non-NULL for bonded ports. */
+static void
+bridge_reconfigure_remotes_late(struct bridge *br)
+{
/* If there are no controllers and the bridge is in standalone
* mode, set up a flow that matches every packet and directs
* them to OFPP_NORMAL (which goes to us). Otherwise, the
* switch is in secure mode and we won't pass any traffic until
* a controller has been defined and it tells us to do so. */
- if (!n_controllers
+ if (!bridge_get_controllers(br, NULL)
&& ofproto_get_fail_mode(br->ofproto) == OFPROTO_FAIL_STANDALONE) {
union ofp_action action;
struct cls_rule rule;
return false;
}
e->iface_tag = tag_create_random();
- ((struct port *) port)->bond_compat_is_stale = true;
}
*tags |= e->iface_tag;
iface = port->ifaces[e->iface_idx];
}
moving_active_iface = false;
- port->bond_compat_is_stale = true;
}
/* Attempts to make the sum of the bond slaves' statistics appear on the fake
iface->up = carrier;
iface->lacp_tx = 0;
- iface->port->bond_compat_is_stale = true;
}
static void
-bond_run(struct bridge *br)
+bond_run(struct port *port)
{
- size_t i, j;
-
- for (i = 0; i < br->n_ports; i++) {
- struct port *port = br->ports[i];
-
- if (port->n_ifaces >= 2) {
- char *devname;
+ size_t i;
+ char *devname;
- if (port->monitor) {
- assert(!port->miimon);
+ if (port->n_ifaces < 2) {
+ return;
+ }
- /* Track carrier going up and down on interfaces. */
- while (!netdev_monitor_poll(port->monitor, &devname)) {
- struct iface *iface;
+ if (port->monitor) {
+ assert(!port->miimon);
- iface = port_lookup_iface(port, devname);
- if (iface) {
- bool up = netdev_get_carrier(iface->netdev);
- bond_link_carrier_update(iface, up);
- }
- free(devname);
- }
- } else {
- assert(port->miimon);
+ /* Track carrier going up and down on interfaces. */
+ while (!netdev_monitor_poll(port->monitor, &devname)) {
+ struct iface *iface;
- if (time_msec() >= port->bond_miimon_next_update) {
- for (j = 0; j < port->n_ifaces; j++) {
- struct iface *iface = port->ifaces[j];
- bool up = netdev_get_miimon(iface->netdev);
- bond_link_carrier_update(iface, up);
- }
- port->bond_miimon_next_update = time_msec() +
- port->bond_miimon_interval;
- }
+ iface = port_lookup_iface(port, devname);
+ if (iface) {
+ bool up = netdev_get_carrier(iface->netdev);
+ bond_link_carrier_update(iface, up);
}
+ free(devname);
+ }
+ } else {
+ assert(port->miimon);
- for (j = 0; j < port->n_ifaces; j++) {
- bond_link_status_update(port->ifaces[j]);
+ if (time_msec() >= port->bond_miimon_next_update) {
+ for (i = 0; i < port->n_ifaces; i++) {
+ struct iface *iface = port->ifaces[i];
+ bool up = netdev_get_miimon(iface->netdev);
+ bond_link_carrier_update(iface, up);
}
+ port->bond_miimon_next_update = time_msec() +
+ port->bond_miimon_interval;
+ }
+ }
- for (j = 0; j < port->n_ifaces; j++) {
- struct iface *iface = port->ifaces[j];
- if (time_msec() >= iface->delay_expires) {
- bond_enable_slave(iface, !iface->enabled);
- }
- }
+ for (i = 0; i < port->n_ifaces; i++) {
+ bond_link_status_update(port->ifaces[i]);
+ }
- if (port->bond_fake_iface
- && time_msec() >= port->bond_next_fake_iface_update) {
- bond_update_fake_iface_stats(port);
- port->bond_next_fake_iface_update = time_msec() + 1000;
- }
+ for (i = 0; i < port->n_ifaces; i++) {
+ struct iface *iface = port->ifaces[i];
+ if (time_msec() >= iface->delay_expires) {
+ bond_enable_slave(iface, !iface->enabled);
}
+ }
- if (port->bond_compat_is_stale) {
- port->bond_compat_is_stale = false;
- port_update_bond_compat(port);
- }
+ if (port->bond_fake_iface
+ && time_msec() >= port->bond_next_fake_iface_update) {
+ bond_update_fake_iface_stats(port);
+ port->bond_next_fake_iface_update = time_msec() + 1000;
}
}
static void
-bond_wait(struct bridge *br)
+bond_wait(struct port *port)
{
- size_t i, j;
+ size_t i;
- for (i = 0; i < br->n_ports; i++) {
- struct port *port = br->ports[i];
- if (port->n_ifaces < 2) {
- continue;
- }
+ if (port->n_ifaces < 2) {
+ return;
+ }
- if (port->monitor) {
- netdev_monitor_poll_wait(port->monitor);
- }
+ if (port->monitor) {
+ netdev_monitor_poll_wait(port->monitor);
+ }
- if (port->miimon) {
- poll_timer_wait_until(port->bond_miimon_next_update);
- }
+ if (port->miimon) {
+ poll_timer_wait_until(port->bond_miimon_next_update);
+ }
- for (j = 0; j < port->n_ifaces; j++) {
- struct iface *iface = port->ifaces[j];
- if (iface->delay_expires != LLONG_MAX) {
- poll_timer_wait_until(iface->delay_expires);
- }
- }
- if (port->bond_fake_iface) {
- poll_timer_wait_until(port->bond_next_fake_iface_update);
+ for (i = 0; i < port->n_ifaces; i++) {
+ struct iface *iface = port->ifaces[i];
+ if (iface->delay_expires != LLONG_MAX) {
+ poll_timer_wait_until(iface->delay_expires);
}
}
+
+ if (port->bond_fake_iface) {
+ poll_timer_wait_until(port->bond_next_fake_iface_update);
+ }
}
static bool
return true;
}
+/* Returns true if a packet with Ethernet destination MAC 'dst' may be mirrored
+ * to a VLAN. In general most packets may be mirrored but we want to drop
+ * protocols that may confuse switches. */
+static bool
+eth_dst_may_rspan(const uint8_t dst[ETH_ADDR_LEN])
+{
+ /* If you change this function's behavior, please update corresponding
+ * documentation in vswitch.xml at the same time. */
+ if (dst[0] != 0x01) {
+ /* All the currently banned MACs happen to start with 01 currently, so
+ * this is a quick way to eliminate most of the good ones. */
+ } else {
+ if (eth_addr_is_reserved(dst)) {
+ /* Drop STP, IEEE pause frames, and other reserved protocols
+ * (01-80-c2-00-00-0x). */
+ return false;
+ }
+
+ if (dst[0] == 0x01 && dst[1] == 0x00 && dst[2] == 0x0c) {
+ /* Cisco OUI. */
+ if ((dst[3] & 0xfe) == 0xcc &&
+ (dst[4] & 0xfe) == 0xcc &&
+ (dst[5] & 0xfe) == 0xcc) {
+ /* Drop the following protocols plus others following the same
+ pattern:
+
+ CDP, VTP, DTP, PAgP (01-00-0c-cc-cc-cc)
+ Spanning Tree PVSTP+ (01-00-0c-cc-cc-cd)
+ STP Uplink Fast (01-00-0c-cd-cd-cd) */
+ return false;
+ }
+
+ if (!(dst[3] | dst[4] | dst[5])) {
+ /* Drop Inter Switch Link packets (01-00-0c-00-00-00). */
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
static void
compose_dsts(const struct bridge *br, const struct flow *flow, uint16_t vlan,
const struct port *in_port, const struct port *out_port,
&& !dst_is_duplicate(set, &dst)) {
dst_set_add(set, &dst);
}
- } else {
+ } else if (eth_dst_may_rspan(flow->dl_dst)) {
for (i = 0; i < br->n_ports; i++) {
struct port *port = br->ports[i];
if (port_includes_vlan(port, m->out_vlan)
* to the exception is if we locked the learning table to avoid
* reflections on bond slaves. If this is the case, just drop the
* packet now. */
- src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan,
- &is_grat_arp_locked);
- if (src_idx != -1 && src_idx != in_port->port_idx &&
- (!is_gratuitous_arp(flow) || is_grat_arp_locked)) {
+ if (in_port->bond_mode != BM_AB) {
+ src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan,
+ &is_grat_arp_locked);
+ if (src_idx != -1 && src_idx != in_port->port_idx &&
+ (!is_gratuitous_arp(flow) || is_grat_arp_locked)) {
return false;
+ }
+ }
+ }
+
+ /* Drop all packets which arrive on backup slaves. This is similar to how
+ * Linux bonding handles active-backup bonds. */
+ if (in_port->bond_mode == BM_AB) {
+
+ *tags |= in_port->active_iface;
+ if (in_port->active_iface != in_iface->port_ifidx) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+ VLOG_WARN_RL(&rl, "active-backup bond received packet on backup"
+ " interface (%s) destined for " ETH_ADDR_FMT,
+ in_iface->name, ETH_ADDR_ARGS(flow->dl_dst));
+ return false ;
}
}
bridge_account_flow_ofhook_cb(const struct flow *flow, tag_type tags,
const struct nlattr *actions,
size_t actions_len,
- unsigned long long int n_bytes, void *br_)
+ uint64_t n_bytes, void *br_)
{
struct bridge *br = br_;
const struct nlattr *a;
if (nl_attr_type(a) == ODP_ACTION_ATTR_OUTPUT) {
struct port *out_port = port_from_dp_ifidx(br, nl_attr_get_u32(a));
if (out_port && out_port->n_ifaces >= 2 &&
- out_port->bond_mode == BM_SLB) {
+ out_port->bond_mode != BM_AB) {
uint16_t vlan = (flow->vlan_tci
? vlan_tci_to_vid(flow->vlan_tci)
: OFP_VLAN_NONE);
now = time_msec();
for (i = 0; i < br->n_ports; i++) {
struct port *port = br->ports[i];
- if (port->n_ifaces > 1 && port->bond_mode == BM_SLB
+ if (port->n_ifaces > 1 && port->bond_mode != BM_AB
&& now >= port->bond_next_rebalance) {
port->bond_next_rebalance = now + port->bond_rebalance_interval;
bond_rebalance_port(port);
}
iface->lacp_status |= LACP_CURRENT;
- iface->lacp_status &= ~LACP_EXPIRED;
+ iface->lacp_status &= ~(LACP_EXPIRED | LACP_DEFAULTED);
iface->lacp_rx = time_msec() + LACP_SLOW_TIME_RX;
iface->lacp_actor.state = iface_get_lacp_state(iface);
}
static void
-lacp_run(struct bridge *br)
+lacp_run(struct port *port)
{
- size_t i, j;
+ size_t i;
struct ofpbuf packet;
- ofpbuf_init(&packet, ETH_HEADER_LEN + LACP_PDU_LEN);
-
- for (i = 0; i < br->n_ports; i++) {
- struct port *port = br->ports[i];
+ if (!port->lacp) {
+ return;
+ }
- if (!port->lacp) {
- continue;
- }
+ ofpbuf_init(&packet, ETH_HEADER_LEN + LACP_PDU_LEN);
- for (j = 0; j < port->n_ifaces; j++) {
- struct iface *iface = port->ifaces[j];
+ for (i = 0; i < port->n_ifaces; i++) {
+ struct iface *iface = port->ifaces[i];
- if (time_msec() > iface->lacp_rx) {
- if (iface->lacp_status & LACP_CURRENT) {
- iface_set_lacp_expired(iface);
- } else if (iface->lacp_status & LACP_EXPIRED) {
- iface_set_lacp_defaulted(iface);
- }
+ if (time_msec() > iface->lacp_rx) {
+ if (iface->lacp_status & LACP_CURRENT) {
+ iface_set_lacp_expired(iface);
+ } else if (iface->lacp_status & LACP_EXPIRED) {
+ iface_set_lacp_defaulted(iface);
}
}
+ }
- if (port->lacp_need_update) {
- lacp_update_ifaces(port);
- }
-
- for (j = 0; j < port->n_ifaces; j++) {
- struct iface *iface = port->ifaces[j];
- uint8_t ea[ETH_ADDR_LEN];
- int error;
+ if (port->lacp_need_update) {
+ lacp_update_ifaces(port);
+ }
- if (time_msec() < iface->lacp_tx || !lacp_iface_may_tx(iface)) {
- continue;
- }
+ for (i = 0; i < port->n_ifaces; i++) {
+ struct iface *iface = port->ifaces[i];
+ uint8_t ea[ETH_ADDR_LEN];
+ int error;
- error = netdev_get_etheraddr(iface->netdev, ea);
- if (!error) {
- iface->lacp_actor.state = iface_get_lacp_state(iface);
- compose_lacp_packet(&packet, &iface->lacp_actor,
- &iface->lacp_partner, ea);
- iface_send_packet(iface, &packet);
- } else {
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10);
- VLOG_ERR_RL(&rl, "iface %s: failed to obtain Ethernet address "
- "(%s)", iface->name, strerror(error));
- }
+ if (time_msec() < iface->lacp_tx || !lacp_iface_may_tx(iface)) {
+ continue;
+ }
- iface->lacp_tx = time_msec() +
- (iface->lacp_partner.state & LACP_STATE_TIME
- ? LACP_FAST_TIME_TX
- : LACP_SLOW_TIME_TX);
+ error = netdev_get_etheraddr(iface->netdev, ea);
+ if (!error) {
+ iface->lacp_actor.state = iface_get_lacp_state(iface);
+ compose_lacp_packet(&packet, &iface->lacp_actor,
+ &iface->lacp_partner, ea);
+ iface_send_packet(iface, &packet);
+ } else {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10);
+ VLOG_ERR_RL(&rl, "iface %s: failed to obtain Ethernet address "
+ "(%s)", iface->name, strerror(error));
}
+
+ iface->lacp_tx = time_msec() +
+ (iface->lacp_partner.state & LACP_STATE_TIME
+ ? LACP_FAST_TIME_TX
+ : LACP_SLOW_TIME_TX);
}
ofpbuf_uninit(&packet);
}
static void
-lacp_wait(struct bridge *br)
+lacp_wait(struct port *port)
{
- size_t i, j;
-
- for (i = 0; i < br->n_ports; i++) {
- struct port *port = br->ports[i];
+ size_t i;
- if (!port->lacp) {
- continue;
- }
+ if (!port->lacp) {
+ return;
+ }
- for (j = 0; j < port->n_ifaces; j++) {
- struct iface *iface = port->ifaces[j];
+ for (i = 0; i < port->n_ifaces; i++) {
+ struct iface *iface = port->ifaces[i];
- if (lacp_iface_may_tx(iface)) {
- poll_timer_wait_until(iface->lacp_tx);
- }
+ if (lacp_iface_may_tx(iface)) {
+ poll_timer_wait_until(iface->lacp_tx);
+ }
- if (iface->lacp_status & (LACP_CURRENT | LACP_EXPIRED)) {
- poll_timer_wait_until(iface->lacp_rx);
- }
+ if (iface->lacp_status & (LACP_CURRENT | LACP_EXPIRED)) {
+ poll_timer_wait_until(iface->lacp_rx);
}
}
}
struct port *port = from->iface->port;
uint64_t delta = hash->tx_bytes;
- assert(port->bond_mode == BM_SLB);
+ assert(port->bond_mode != BM_AB);
VLOG_INFO("bond %s: shift %"PRIu64"kB of load (with hash %td) "
"from %s to %s (now carrying %"PRIu64"kB and "
}
if (i < from->n_hashes) {
bond_shift_load(from, to, i);
- port->bond_compat_is_stale = true;
/* If the result of the migration changed the relative order of
* 'from' and 'to' swap them back to maintain invariants. */
ofproto_revalidate(port->bridge->ofproto, entry->iface_tag);
entry->iface_idx = iface->port_ifidx;
entry->iface_tag = tag_create_random();
- port->bond_compat_is_stale = true;
unixctl_command_reply(conn, 200, "migrated");
}
\f
/* Port functions. */
+static void
+port_run(struct port *port)
+{
+ size_t i;
+
+ lacp_run(port);
+ bond_run(port);
+
+ for (i = 0; i < port->n_ifaces; i++) {
+ struct iface *iface = port->ifaces[i];
+
+ if (iface->cfm) {
+ struct ofpbuf *packet = cfm_run(iface->cfm);
+ if (packet) {
+ iface_send_packet(iface, packet);
+ ofpbuf_uninit(packet);
+ free(packet);
+ }
+ }
+ }
+}
+
+static void
+port_wait(struct port *port)
+{
+ size_t i;
+
+ lacp_wait(port);
+ bond_wait(port);
+
+ for (i = 0; i < port->n_ifaces; i++) {
+ struct iface *iface = port->ifaces[i];
+ if (iface->cfm) {
+ cfm_wait(iface->cfm);
+ }
+ }
+}
+
static struct port *
port_create(struct bridge *br, const char *name)
{
struct port *del;
int i;
- proc_net_compat_update_vlan(port->name, NULL, 0);
- proc_net_compat_update_bond(port->name, NULL);
-
for (i = 0; i < MAX_MIRRORS; i++) {
struct mirror *m = br->mirrors[i];
if (m && m->out_port == port) {
netdev_monitor_destroy(port->monitor);
free(port->ifaces);
bitmap_free(port->trunks);
+ free(port->bond_hash);
free(port->name);
free(port);
bridge_flush(br);
}
if (port->n_ifaces < 2) {
/* Not a bonded port. */
- if (port->bond_hash) {
- free(port->bond_hash);
- port->bond_hash = NULL;
- port->bond_compat_is_stale = true;
- }
-
+ free(port->bond_hash);
+ port->bond_hash = NULL;
port->bond_fake_iface = false;
+ port->active_iface = -1;
+ port->no_ifaces_tag = 0;
} else {
size_t i;
e->iface_idx = -1;
e->tx_bytes = 0;
}
- port->no_ifaces_tag = tag_create_random();
- bond_choose_active_iface(port);
port->bond_next_rebalance
= time_msec() + port->bond_rebalance_interval;
-
- if (port->cfg->bond_fake_iface) {
- port->bond_next_fake_iface_update = time_msec();
- }
} else if (port->bond_mode == BM_AB) {
free(port->bond_hash);
port->bond_hash = NULL;
}
- port->bond_compat_is_stale = true;
- port->bond_fake_iface = port->cfg->bond_fake_iface;
-
- if (!port->miimon) {
- port->monitor = netdev_monitor_create();
- for (i = 0; i < port->n_ifaces; i++) {
- netdev_monitor_add(port->monitor, port->ifaces[i]->netdev);
- }
- }
- }
-}
-
-static void
-port_update_bond_compat(struct port *port)
-{
- struct compat_bond_hash compat_hashes[BOND_MASK + 1];
- struct compat_bond bond;
- size_t i;
- if (port->n_ifaces < 2 || port->bond_mode != BM_SLB) {
- proc_net_compat_update_bond(port->name, NULL);
- return;
- }
-
- bond.up = false;
- bond.updelay = port->updelay;
- bond.downdelay = port->downdelay;
-
- bond.n_hashes = 0;
- bond.hashes = compat_hashes;
- if (port->bond_hash) {
- const struct bond_entry *e;
- for (e = port->bond_hash; e <= &port->bond_hash[BOND_MASK]; e++) {
- if (e->iface_idx >= 0 && e->iface_idx < port->n_ifaces) {
- struct compat_bond_hash *cbh = &bond.hashes[bond.n_hashes++];
- cbh->hash = e - port->bond_hash;
- cbh->netdev_name = port->ifaces[e->iface_idx]->name;
- }
+ if (!port->no_ifaces_tag) {
+ port->no_ifaces_tag = tag_create_random();
}
- }
-
- bond.n_slaves = port->n_ifaces;
- bond.slaves = xmalloc(port->n_ifaces * sizeof *bond.slaves);
- for (i = 0; i < port->n_ifaces; i++) {
- struct iface *iface = port->ifaces[i];
- struct compat_bond_slave *slave = &bond.slaves[i];
- slave->name = iface->name;
- /* We need to make the same determination as the Linux bonding
- * code to determine whether a slave should be consider "up".
- * The Linux function bond_miimon_inspect() supports four
- * BOND_LINK_* states:
- *
- * - BOND_LINK_UP: carrier detected, updelay has passed.
- * - BOND_LINK_FAIL: carrier lost, downdelay in progress.
- * - BOND_LINK_DOWN: carrier lost, downdelay has passed.
- * - BOND_LINK_BACK: carrier detected, updelay in progress.
- *
- * The function bond_info_show_slave() only considers BOND_LINK_UP
- * to be "up" and anything else to be "down".
- */
- slave->up = iface->enabled && iface->delay_expires == LLONG_MAX;
- if (slave->up) {
- bond.up = true;
+ if (port->active_iface < 0) {
+ bond_choose_active_iface(port);
}
- netdev_get_etheraddr(iface->netdev, slave->mac);
- }
-
- if (port->bond_fake_iface) {
- struct netdev *bond_netdev;
- if (!netdev_open_default(port->name, &bond_netdev)) {
- if (bond.up) {
- netdev_turn_flags_on(bond_netdev, NETDEV_UP, true);
- } else {
- netdev_turn_flags_off(bond_netdev, NETDEV_UP, true);
- }
- netdev_close(bond_netdev);
+ port->bond_fake_iface = port->cfg->bond_fake_iface;
+ if (port->bond_fake_iface) {
+ port->bond_next_fake_iface_update = time_msec();
}
- }
-
- proc_net_compat_update_bond(port->name, &bond);
- free(bond.slaves);
-}
-static void
-port_update_vlan_compat(struct port *port)
-{
- struct bridge *br = port->bridge;
- char *vlandev_name = NULL;
-
- if (port->vlan > 0) {
- /* Figure out the name that the VLAN device should actually have, if it
- * existed. This takes some work because the VLAN device would not
- * have port->name in its name; rather, it would have the trunk port's
- * name, and 'port' would be attached to a bridge that also had the
- * VLAN device one of its ports. So we need to find a trunk port that
- * includes port->vlan.
- *
- * There might be more than one candidate. This doesn't happen on
- * XenServer, so if it happens we just pick the first choice in
- * alphabetical order instead of creating multiple VLAN devices. */
- size_t i;
- for (i = 0; i < br->n_ports; i++) {
- struct port *p = br->ports[i];
- if (port_trunks_vlan(p, port->vlan)
- && p->n_ifaces
- && (!vlandev_name || strcmp(p->name, vlandev_name) <= 0))
- {
- uint8_t ea[ETH_ADDR_LEN];
- netdev_get_etheraddr(p->ifaces[0]->netdev, ea);
- if (!eth_addr_is_multicast(ea) &&
- !eth_addr_is_reserved(ea) &&
- !eth_addr_is_zero(ea)) {
- vlandev_name = p->name;
- }
+ if (!port->miimon) {
+ port->monitor = netdev_monitor_create();
+ for (i = 0; i < port->n_ifaces; i++) {
+ netdev_monitor_add(port->monitor, port->ifaces[i]->netdev);
}
}
}
- proc_net_compat_update_vlan(port->name, vlandev_name, port->vlan);
}
\f
/* Interface functions. */
return shash_find_data(&br->iface_by_name, name);
}
+static struct iface *
+iface_find(const char *name)
+{
+ const struct bridge *br;
+
+ LIST_FOR_EACH (br, node, &all_bridges) {
+ struct iface *iface = iface_lookup(br, name);
+
+ if (iface) {
+ return iface;
+ }
+ }
+ return NULL;
+}
+
static struct iface *
iface_from_dp_ifidx(const struct bridge *br, uint16_t dp_ifidx)
{