#include "port-array.h"
#include "proc-net-compat.h"
#include "process.h"
+#include "secchan/netflow.h"
#include "secchan/ofproto.h"
#include "socket-util.h"
#include "stp.h"
uint64_t dpid;
struct iface *local_iface = NULL;
const char *devname;
- uint8_t engine_type = br->dpif.minor;
- uint8_t engine_id = br->dpif.minor;
- bool add_id_to_iface = false;
- struct svec nf_hosts;
+ struct netflow_options nf_options;
bridge_fetch_dp_ifaces(br);
for (i = 0; i < br->n_ports; ) {
ofproto_set_datapath_id(br->ofproto, dpid);
/* Set NetFlow configuration on this bridge. */
+ memset(&nf_options, 0, sizeof nf_options);
+ nf_options.engine_type = br->dpif.minor;
+ nf_options.engine_id = br->dpif.minor;
+ nf_options.active_timeout = -1;
+
if (cfg_has("netflow.%s.engine-type", br->name)) {
- engine_type = cfg_get_int(0, "netflow.%s.engine-type",
+ nf_options.engine_type = cfg_get_int(0, "netflow.%s.engine-type",
br->name);
}
if (cfg_has("netflow.%s.engine-id", br->name)) {
- engine_id = cfg_get_int(0, "netflow.%s.engine-id", br->name);
+ nf_options.engine_id = cfg_get_int(0, "netflow.%s.engine-id",
+ br->name);
+ }
+ if (cfg_has("netflow.%s.active-timeout", br->name)) {
+ nf_options.active_timeout = cfg_get_int(0,
+ "netflow.%s.active-timeout",
+ br->name);
}
if (cfg_has("netflow.%s.add-id-to-iface", br->name)) {
- add_id_to_iface = cfg_get_bool(0, "netflow.%s.add-id-to-iface",
- br->name);
+ nf_options.add_id_to_iface = cfg_get_bool(0,
+ "netflow.%s.add-id-to-iface",
+ br->name);
}
- if (add_id_to_iface && engine_id > 0x7f) {
+ if (nf_options.add_id_to_iface && nf_options.engine_id > 0x7f) {
VLOG_WARN("bridge %s: netflow port mangling may conflict with "
"another vswitch, choose an engine id less than 128",
br->name);
}
- if (add_id_to_iface && br->n_ports > 0x1ff) {
+ if (nf_options.add_id_to_iface && br->n_ports > 508) {
VLOG_WARN("bridge %s: netflow port mangling will conflict with "
- "another port when 512 or more ports are used",
+ "another port when more than 508 ports are used",
br->name);
}
- svec_init(&nf_hosts);
- cfg_get_all_keys(&nf_hosts, "netflow.%s.host", br->name);
- if (ofproto_set_netflow(br->ofproto, &nf_hosts, engine_type,
- engine_id, add_id_to_iface)) {
+ svec_init(&nf_options.collectors);
+ cfg_get_all_keys(&nf_options.collectors, "netflow.%s.host", br->name);
+ if (ofproto_set_netflow(br->ofproto, &nf_options)) {
VLOG_ERR("bridge %s: problem setting netflow collectors",
br->name);
}
- svec_destroy(&nf_hosts);
+ svec_destroy(&nf_options.collectors);
/* Update the controller and related settings. It would be more
* straightforward to call this from bridge_reconfigure_one(), but we
static size_t
compose_dsts(const struct bridge *br, const flow_t *flow, uint16_t vlan,
const struct port *in_port, const struct port *out_port,
- struct dst dsts[], tag_type *tags)
+ struct dst dsts[], tag_type *tags, uint16_t *nf_output_iface)
{
mirror_mask_t mirrors = in_port->src_mirrors;
struct dst *dst = dsts;
dst++;
}
}
+ *nf_output_iface = NF_OUT_FLOOD;
} else if (out_port && set_dst(dst, flow, in_port, out_port, tags)) {
+ *nf_output_iface = dst->dp_ifidx;
mirrors |= out_port->dst_mirrors;
dst++;
}
static void
compose_actions(struct bridge *br, const flow_t *flow, uint16_t vlan,
const struct port *in_port, const struct port *out_port,
- tag_type *tags, struct odp_actions *actions)
+ tag_type *tags, struct odp_actions *actions,
+ uint16_t *nf_output_iface)
{
struct dst dsts[DP_MAX_PORTS * (MAX_MIRRORS + 1)];
size_t n_dsts;
const struct dst *p;
uint16_t cur_vlan;
- n_dsts = compose_dsts(br, flow, vlan, in_port, out_port, dsts, tags);
+ n_dsts = compose_dsts(br, flow, vlan, in_port, out_port, dsts, tags,
+ nf_output_iface);
cur_vlan = ntohs(flow->dl_vlan);
for (p = dsts; p < &dsts[n_dsts]; p++) {
}
static bool
-is_bcast_arp_reply(const flow_t *flow, const struct ofpbuf *packet)
+is_bcast_arp_reply(const flow_t *flow)
{
- struct arp_eth_header *arp = (struct arp_eth_header *) packet->data;
return (flow->dl_type == htons(ETH_TYPE_ARP)
- && eth_addr_is_broadcast(flow->dl_dst)
- && packet->size >= sizeof(struct arp_eth_header)
- && arp->ar_op == ARP_OP_REQUEST);
+ && flow->nw_proto == ARP_OP_REPLY
+ && eth_addr_is_broadcast(flow->dl_dst));
}
/* If the composed actions may be applied to any packet in the given 'flow',
static bool
process_flow(struct bridge *br, const flow_t *flow,
const struct ofpbuf *packet, struct odp_actions *actions,
- tag_type *tags)
+ tag_type *tags, uint16_t *nf_output_iface)
{
struct iface *in_iface;
struct port *in_port;
* to this rule: the host has moved to another switch. */
src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan);
if (src_idx != -1 && src_idx != in_port->port_idx &&
- (!packet || !is_bcast_arp_reply(flow, packet))) {
+ !is_bcast_arp_reply(flow)) {
goto done;
}
}
tags);
if (out_port_idx >= 0 && out_port_idx < br->n_ports) {
out_port = br->ports[out_port_idx];
+ } else if (!packet) {
+ /* If we are revalidating but don't have a learning entry then
+ * eject the flow. Installing a flow that floods packets will
+ * prevent us from seeing future packets and learning properly. */
+ return false;
}
}
}
done:
- compose_actions(br, flow, vlan, in_port, out_port, tags, actions);
+ compose_actions(br, flow, vlan, in_port, out_port, tags, actions,
+ nf_output_iface);
- /*
- * We send out only a single packet, instead of setting up a flow, if the
- * packet is an ARP directed to broadcast that arrived on a bonded
- * interface. In such a situation ARP requests and replies must be handled
- * differently, but OpenFlow unfortunately can't distinguish them.
- */
- return (in_port->n_ifaces < 2
- || flow->dl_type != htons(ETH_TYPE_ARP)
- || !eth_addr_is_broadcast(flow->dl_dst));
+ return true;
}
/* Careful: 'opp' is in host byte order and opp->port_no is an OFP port
static bool
bridge_normal_ofhook_cb(const flow_t *flow, const struct ofpbuf *packet,
- struct odp_actions *actions, tag_type *tags, void *br_)
+ struct odp_actions *actions, tag_type *tags,
+ uint16_t *nf_output_iface, void *br_)
{
struct bridge *br = br_;
#endif
COVERAGE_INC(bridge_process_flow);
- return process_flow(br, flow, packet, actions, tags);
+ return process_flow(br, flow, packet, actions, tags, nf_output_iface);
}
static void
/* Shifts 'hash' from 'from' to 'to' within 'port'. */
static void
bond_shift_load(struct slave_balance *from, struct slave_balance *to,
- struct bond_entry *hash)
+ int hash_idx)
{
+ struct bond_entry *hash = from->hashes[hash_idx];
struct port *port = from->iface->port;
uint64_t delta = hash->tx_bytes;
* it require more work, the only purpose it would be to allow that hash to
* be migrated to another slave in this rebalancing run, and there is no
* point in doing that. */
- if (from->hashes[0] == hash) {
+ if (hash_idx == 0) {
from->hashes++;
} else {
- int i = hash - from->hashes[0];
- memmove(from->hashes + i, from->hashes + i + 1,
- (from->n_hashes - (i + 1)) * sizeof *from->hashes);
+ memmove(from->hashes + hash_idx, from->hashes + hash_idx + 1,
+ (from->n_hashes - (hash_idx + 1)) * sizeof *from->hashes);
}
from->n_hashes--;
/* 'from' is carrying significantly more load than 'to', and that
* load is split across at least two different hashes. Pick a hash
* to migrate to 'to' (the least-loaded slave), given that doing so
- * must not cause 'to''s load to exceed 'from''s load.
+ * must decrease the ratio of the load on the two slaves by at
+ * least 0.1.
*
* The sort order we use means that we prefer to shift away the
* smallest hashes instead of the biggest ones. There is little
* reason behind this decision; we could use the opposite sort
* order to shift away big hashes ahead of small ones. */
size_t i;
+ bool order_swapped;
for (i = 0; i < from->n_hashes; i++) {
+ double old_ratio, new_ratio;
uint64_t delta = from->hashes[i]->tx_bytes;
- if (to->tx_bytes + delta < from->tx_bytes - delta) {
+
+ if (delta == 0 || from->tx_bytes - delta == 0) {
+ /* Pointless move. */
+ continue;
+ }
+
+ order_swapped = from->tx_bytes - delta < to->tx_bytes + delta;
+
+ if (to->tx_bytes == 0) {
+ /* Nothing on the new slave, move it. */
+ break;
+ }
+
+ old_ratio = (double)from->tx_bytes / to->tx_bytes;
+ new_ratio = (double)(from->tx_bytes - delta) /
+ (to->tx_bytes + delta);
+
+ if (new_ratio == 0) {
+ /* Should already be covered but check to prevent division
+ * by zero. */
+ continue;
+ }
+
+ if (new_ratio < 1) {
+ new_ratio = 1 / new_ratio;
+ }
+
+ if (old_ratio - new_ratio > 0.1) {
+ /* Would decrease the ratio, move it. */
break;
}
}
if (i < from->n_hashes) {
- bond_shift_load(from, to, from->hashes[i]);
+ bond_shift_load(from, to, i);
+ port->bond_compat_is_stale = true;
+
+ /* If the result of the migration changed the relative order of
+ * 'from' and 'to' swap them back to maintain invariants. */
+ if (order_swapped) {
+ swap_bals(from, to);
+ }
/* Re-sort 'bals'. Note that this may make 'from' and 'to'
* point to different slave_balance structures. It is only
} else {
from++;
}
- port->bond_compat_is_stale = true;
}
}
continue;
}
- ds_put_format(&ds, "\thash %d: %lld kB load\n",
+ ds_put_format(&ds, "\thash %d: %"PRIu64" kB load\n",
hash, be->tx_bytes / 1024);
/* MACs. */