#include <poll.h>
#include <regex.h>
#include <stdlib.h>
+#include <signal.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include "packets.h"
#include "poll-loop.h"
#include "rconn.h"
+#include "timeval.h"
#include "util.h"
#include "vconn-ssl.h"
#include "vconn.h"
int probe_interval; /* # seconds idle before sending echo request. */
int max_backoff; /* Max # seconds between connection attempts. */
+ /* Packet-in rate-limiting. */
+ int rate_limit; /* Tokens added to bucket per second. */
+ int burst_limit; /* Maximum number token bucket size. */
+
/* Discovery behavior. */
regex_t accept_controller_regex; /* Controller vconns to accept. */
const char *accept_controller_re; /* String version of regex. */
struct half {
struct rconn *rconn;
struct buffer *rxbuf;
+ int n_txq; /* No. of packets queued for tx on 'rconn'. */
};
struct relay {
struct hook {
bool (*packet_cb)(struct relay *, int half, void *aux);
void (*periodic_cb)(void *aux);
+ void (*wait_cb)(void *aux);
void *aux;
};
static struct hook make_hook(bool (*packet_cb)(struct relay *, int, void *),
void (*periodic_cb)(void *),
+ void (*wait_cb)(void *),
void *aux);
static struct discovery *discovery_init(const struct settings *);
static struct hook fail_open_hook_create(const struct settings *,
struct rconn *local,
struct rconn *remote);
+static struct hook rate_limit_hook_create(const struct settings *,
+ struct rconn *local,
+ struct rconn *remote);
static void modify_dhcp_request(struct dhcp_msg *, void *aux);
static bool validate_dhcp_offer(const struct dhcp_msg *, void *aux);
struct list relays = LIST_INITIALIZER(&relays);
- struct hook hooks[3];
+ struct hook hooks[4];
size_t n_hooks;
struct rconn *local_rconn, *remote_rconn;
set_program_name(argv[0]);
register_fault_handlers();
+ time_init();
vlog_init();
parse_options(argc, argv, &s);
+ signal(SIGPIPE, SIG_IGN);
/* Start listening for management connections. */
if (s.listen_vconn_name) {
daemonize();
+ VLOG_WARN("OpenFlow reference implementation version %s", VERSION);
+ VLOG_WARN("OpenFlow protocol version 0x%02x", OFP_VERSION);
+
/* Connect to datapath. */
- local_rconn = rconn_create(1, 0, s.max_backoff);
+ local_rconn = rconn_create(0, s.max_backoff);
rconn_connect(local_rconn, s.nl_name);
/* Connect to controller. */
- remote_rconn = rconn_create(1, s.probe_interval, s.max_backoff);
+ remote_rconn = rconn_create(s.probe_interval, s.max_backoff);
if (s.controller_name) {
retval = rconn_connect(remote_rconn, s.controller_name);
if (retval == EAFNOSUPPORT) {
hooks[n_hooks++] = fail_open_hook_create(&s,
local_rconn, remote_rconn);
}
+ if (s.rate_limit) {
+ hooks[n_hooks++] = rate_limit_hook_create(&s,
+ local_rconn, remote_rconn);
+ }
assert(n_hooks <= ARRAY_SIZE(hooks));
for (;;) {
if (listen_vconn) {
vconn_accept_wait(listen_vconn);
}
+ for (i = 0; i < n_hooks; i++) {
+ if (hooks[i].wait_cb) {
+ hooks[i].wait_cb(hooks[i].aux);
+ }
+ }
if (discovery) {
discovery_wait(discovery);
}
static struct hook
make_hook(bool (*packet_cb)(struct relay *, int half, void *aux),
void (*periodic_cb)(void *aux),
+ void (*wait_cb)(void *aux),
void *aux)
{
struct hook h;
h.packet_cb = packet_cb;
h.periodic_cb = periodic_cb;
+ h.wait_cb = wait_cb;
h.aux = aux;
return h;
}
}
/* Create and return relay. */
- r1 = rconn_create(1, 0, 0);
+ r1 = rconn_create(0, 0);
rconn_connect_unreliably(r1, nl_name_without_subscription, new_local);
free(nl_name_without_subscription);
- r2 = rconn_create(1, 0, 0);
+ r2 = rconn_create(0, 0);
rconn_connect_unreliably(r2, "passive", new_remote);
return relay_create(r1, r2, true);
static struct relay *
relay_create(struct rconn *local, struct rconn *remote, bool is_mgmt_conn)
{
- struct relay *r;
- int i;
-
- r = xmalloc(sizeof *r);
+ struct relay *r = xcalloc(1, sizeof *r);
r->halves[HALF_LOCAL].rconn = local;
r->halves[HALF_REMOTE].rconn = remote;
- for (i = 0; i < 2; i++) {
- r->halves[i].rxbuf = NULL;
- }
r->is_mgmt_conn = is_mgmt_conn;
return r;
}
}
}
- if (this->rxbuf) {
- int retval = rconn_send(peer->rconn, this->rxbuf);
+ if (this->rxbuf && !this->n_txq) {
+ int retval = rconn_send(peer->rconn, this->rxbuf,
+ &this->n_txq);
if (retval != EAGAIN) {
if (!retval) {
progress = true;
struct mac_learning *ml;
struct netdev *of_device;
uint8_t mac[ETH_ADDR_LEN];
+ int n_queued;
};
static void
-queue_tx(struct rconn *rc, struct buffer *b)
+queue_tx(struct rconn *rc, struct in_band_data *in_band, struct buffer *b)
{
- if (rconn_force_send(rc, b)) {
- buffer_delete(b);
- }
+ rconn_send_with_limit(rc, b, &in_band->n_queued, 10);
}
-static bool
-is_controller_mac(const uint8_t dl_addr[ETH_ADDR_LEN], struct netdev *netdev,
- struct rconn *controller)
+static const uint8_t *
+get_controller_mac(struct netdev *netdev, struct rconn *controller)
{
static uint32_t ip, last_nonzero_ip;
static uint8_t mac[ETH_ADDR_LEN], last_nonzero_mac[ETH_ADDR_LEN];
uint32_t last_ip = ip;
- time_t now = time(0);
+ time_t now = time_now();
ip = rconn_get_ip(controller);
if (last_ip != ip || !next_refresh || now >= next_refresh) {
* Otherwise, we can afford to wait a little while. */
next_refresh = now + (!ip || have_mac ? 10 : 1);
}
- return !eth_addr_is_zero(mac) && eth_addr_equals(mac, dl_addr);
+ return !eth_addr_is_zero(mac) ? mac : NULL;
+}
+
+static bool
+is_controller_mac(const uint8_t mac[ETH_ADDR_LEN],
+ const uint8_t *controller_mac)
+{
+ return controller_mac && eth_addr_equals(mac, controller_mac);
}
static bool
struct buffer pkt;
struct flow flow;
uint16_t in_port, out_port;
+ const uint8_t *controller_mac;
if (half != HALF_LOCAL || r->is_mgmt_conn) {
return false;
flow_extract(&pkt, in_port, &flow);
/* Deal with local stuff. */
+ controller_mac = get_controller_mac(in_band->of_device,
+ r->halves[HALF_REMOTE].rconn);
if (in_port == OFPP_LOCAL) {
+ /* Sent by secure channel. */
out_port = mac_learning_lookup(in_band->ml, flow.dl_dst);
} else if (eth_addr_equals(flow.dl_dst, in_band->mac)) {
+ /* Sent to secure channel. */
out_port = OFPP_LOCAL;
if (mac_learning_learn(in_band->ml, flow.dl_src, in_port)) {
VLOG_DBG("learned that "ETH_ADDR_FMT" is on port %"PRIu16,
}
} else if (flow.dl_type == htons(ETH_TYPE_ARP)
&& eth_addr_is_broadcast(flow.dl_dst)
- && is_controller_mac(flow.dl_src, in_band->of_device,
- r->halves[HALF_REMOTE].rconn)) {
+ && is_controller_mac(flow.dl_src, controller_mac)) {
+ /* ARP sent by controller. */
out_port = OFPP_FLOOD;
+ } else if (is_controller_mac(flow.dl_dst, controller_mac)
+ && in_port == mac_learning_lookup(in_band->ml,
+ controller_mac)) {
+ /* Drop controller traffic that arrives on the controller port. */
+ queue_tx(rc, in_band, make_add_flow(&flow, ntohl(opi->buffer_id),
+ in_band->s->max_idle, 0));
+ return true;
} else {
return false;
}
if (out_port != OFPP_FLOOD) {
/* The output port is known, so add a new flow. */
- queue_tx(rc, make_add_simple_flow(&flow, ntohl(opi->buffer_id),
- out_port, in_band->s->max_idle));
+ queue_tx(rc, in_band,
+ make_add_simple_flow(&flow, ntohl(opi->buffer_id),
+ out_port, in_band->s->max_idle));
/* If the switch didn't buffer the packet, we need to send a copy. */
if (ntohl(opi->buffer_id) == UINT32_MAX) {
- queue_tx(rc, make_unbuffered_packet_out(&pkt, in_port, out_port));
+ queue_tx(rc, in_band,
+ make_unbuffered_packet_out(&pkt, in_port, out_port));
}
} else {
/* We don't know that MAC. Send along the packet without setting up a
b = make_buffered_packet_out(ntohl(opi->buffer_id),
in_port, out_port);
}
- queue_tx(rc, b);
+ queue_tx(rc, in_band, b);
}
return true;
}
struct in_band_data *in_band;
int retval;
- in_band = xmalloc(sizeof *in_band);
+ in_band = xcalloc(1, sizeof *in_band);
in_band->s = s;
in_band->ml = mac_learning_create();
retval = netdev_open(s->of_name, NETDEV_ETH_TYPE_NONE,
memcpy(in_band->mac, netdev_get_etheraddr(in_band->of_device),
ETH_ADDR_LEN);
- return make_hook(in_band_packet_cb, NULL, in_band);
+ return make_hook(in_band_packet_cb, NULL, NULL, in_band);
}
\f
/* Fail open support. */
fail_open->local_rconn = local_rconn;
fail_open->remote_rconn = remote_rconn;
fail_open->lswitch = NULL;
- return make_hook(fail_open_packet_cb, fail_open_periodic_cb, fail_open);
+ return make_hook(fail_open_packet_cb, fail_open_periodic_cb, NULL,
+ fail_open);
+}
+\f
+struct rate_limiter {
+ const struct settings *s;
+ struct rconn *remote_rconn;
+
+ /* One queue per physical port. */
+ struct queue queues[OFPP_MAX];
+ int n_queued; /* Sum over queues[*].n. */
+ int next_tx_port; /* Next port to check in round-robin. */
+
+ /* Token bucket.
+ *
+ * It costs 1000 tokens to send a single packet_in message. A single token
+ * per message would be more straightforward, but this choice lets us avoid
+ * round-off error in refill_bucket()'s calculation of how many tokens to
+ * add to the bucket, since no division step is needed. */
+ long long int last_fill; /* Time at which we last added tokens. */
+ int tokens; /* Current number of tokens. */
+
+ /* Transmission queue. */
+ int n_txq; /* No. of packets waiting in rconn for tx. */
+};
+
+/* Drop a packet from the longest queue in 'rl'. */
+static void
+drop_packet(struct rate_limiter *rl)
+{
+ struct queue *longest; /* Queue currently selected as longest. */
+ int n_longest; /* # of queues of same length as 'longest'. */
+ struct queue *q;
+
+ longest = &rl->queues[0];
+ n_longest = 1;
+ for (q = &rl->queues[0]; q < &rl->queues[OFPP_MAX]; q++) {
+ if (longest->n < q->n) {
+ longest = q;
+ n_longest = 1;
+ } else if (longest->n == q->n) {
+ n_longest++;
+
+ /* Randomly select one of the longest queues, with a uniform
+ * distribution (Knuth algorithm 3.4.2R). */
+ if (!random_range(n_longest)) {
+ longest = q;
+ }
+ }
+ }
+
+ /* FIXME: do we want to pop the tail instead? */
+ buffer_delete(queue_pop_head(longest));
+ rl->n_queued--;
+}
+
+/* Remove and return the next packet to transmit (in round-robin order). */
+static struct buffer *
+dequeue_packet(struct rate_limiter *rl)
+{
+ unsigned int i;
+
+ for (i = 0; i < OFPP_MAX; i++) {
+ unsigned int port = (rl->next_tx_port + i) % OFPP_MAX;
+ struct queue *q = &rl->queues[port];
+ if (q->n) {
+ rl->next_tx_port = (port + 1) % OFPP_MAX;
+ rl->n_queued--;
+ return queue_pop_head(q);
+ }
+ }
+ NOT_REACHED();
+}
+
+/* Add tokens to the bucket based on elapsed time. */
+static void
+refill_bucket(struct rate_limiter *rl)
+{
+ const struct settings *s = rl->s;
+ long long int now = time_msec();
+ long long int tokens = (now - rl->last_fill) * s->rate_limit + rl->tokens;
+ if (tokens >= 1000) {
+ rl->last_fill = now;
+ rl->tokens = MIN(tokens, s->burst_limit * 1000);
+ }
+}
+
+/* Attempts to remove enough tokens from 'rl' to transmit a packet. Returns
+ * true if successful, false otherwise. (In the latter case no tokens are
+ * removed.) */
+static bool
+get_token(struct rate_limiter *rl)
+{
+ if (rl->tokens >= 1000) {
+ rl->tokens -= 1000;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static bool
+rate_limit_packet_cb(struct relay *r, int half, void *rl_)
+{
+ struct rate_limiter *rl = rl_;
+ const struct settings *s = rl->s;
+ struct buffer *msg = r->halves[HALF_LOCAL].rxbuf;
+ struct ofp_header *oh;
+
+ if (half == HALF_REMOTE) {
+ return false;
+ }
+
+ oh = msg->data;
+ if (oh->type != OFPT_PACKET_IN) {
+ return false;
+ }
+ if (msg->size < offsetof(struct ofp_packet_in, data)) {
+ VLOG_WARN("packet too short (%zu bytes) for packet_in", msg->size);
+ return false;
+ }
+
+ if (!rl->n_queued && get_token(rl)) {
+ /* In the common case where we are not constrained by the rate limit,
+ * let the packet take the normal path. */
+ return false;
+ } else {
+ /* Otherwise queue it up for the periodic callback to drain out. */
+ struct ofp_packet_in *opi = msg->data;
+ int port = ntohs(opi->in_port) % OFPP_MAX;
+ if (rl->n_queued >= s->burst_limit) {
+ drop_packet(rl);
+ }
+ queue_push_tail(&rl->queues[port], buffer_clone(msg));
+ rl->n_queued++;
+ return true;
+ }
+}
+
+static void
+rate_limit_periodic_cb(void *rl_)
+{
+ struct rate_limiter *rl = rl_;
+ int i;
+
+ /* Drain some packets out of the bucket if possible, but limit the number
+ * of iterations to allow other code to get work done too. */
+ refill_bucket(rl);
+ for (i = 0; rl->n_queued && get_token(rl) && i < 50; i++) {
+ /* Use a small, arbitrary limit for the amount of queuing to do here,
+ * because the TCP connection is responsible for buffering and there is
+ * no point in trying to transmit faster than the TCP connection can
+ * handle. */
+ struct buffer *b = dequeue_packet(rl);
+ rconn_send_with_limit(rl->remote_rconn, b, &rl->n_txq, 10);
+ }
+}
+
+static void
+rate_limit_wait_cb(void *rl_)
+{
+ struct rate_limiter *rl = rl_;
+ if (rl->n_queued) {
+ if (rl->tokens >= 1000) {
+ /* We can transmit more packets as soon as we're called again. */
+ poll_immediate_wake();
+ } else {
+ /* We have to wait for the bucket to re-fill. We could calculate
+ * the exact amount of time here for increased smoothness. */
+ poll_timer_wait(TIME_UPDATE_INTERVAL / 2);
+ }
+ }
+}
+
+static struct hook
+rate_limit_hook_create(const struct settings *s,
+ struct rconn *local,
+ struct rconn *remote)
+{
+ struct rate_limiter *rl;
+ size_t i;
+
+ rl = xcalloc(1, sizeof *rl);
+ rl->s = s;
+ rl->remote_rconn = remote;
+ for (i = 0; i < ARRAY_SIZE(rl->queues); i++) {
+ queue_init(&rl->queues[i]);
+ }
+ rl->last_fill = time_msec();
+ rl->tokens = s->rate_limit * 100;
+ return make_hook(rate_limit_packet_cb, rate_limit_periodic_cb,
+ rate_limit_wait_cb, rl);
}
\f
/* Controller discovery. */
OPT_NO_RESOLV_CONF,
OPT_INACTIVITY_PROBE,
OPT_MAX_IDLE,
- OPT_MAX_BACKOFF
+ OPT_MAX_BACKOFF,
+ OPT_RATE_LIMIT,
+ OPT_BURST_LIMIT
};
static struct option long_options[] = {
{"accept-vconn", required_argument, 0, OPT_ACCEPT_VCONN},
{"max-idle", required_argument, 0, OPT_MAX_IDLE},
{"max-backoff", required_argument, 0, OPT_MAX_BACKOFF},
{"listen", required_argument, 0, 'l'},
+ {"rate-limit", optional_argument, 0, OPT_RATE_LIMIT},
+ {"burst-limit", required_argument, 0, OPT_BURST_LIMIT},
{"detach", no_argument, 0, 'D'},
{"pidfile", optional_argument, 0, 'P'},
{"verbose", optional_argument, 0, 'v'},
s->probe_interval = 15;
s->max_backoff = 15;
s->update_resolv_conf = true;
+ s->rate_limit = 0;
+ s->burst_limit = 0;
for (;;) {
int c;
}
break;
+ case OPT_RATE_LIMIT:
+ if (optarg) {
+ s->rate_limit = atoi(optarg);
+ if (s->rate_limit < 1) {
+ fatal(0, "--rate-limit argument must be at least 1");
+ }
+ } else {
+ s->rate_limit = 1000;
+ }
+ break;
+
+ case OPT_BURST_LIMIT:
+ s->burst_limit = atoi(optarg);
+ if (s->burst_limit < 1) {
+ fatal(0, "--burst-limit argument must be at least 1");
+ }
+ break;
+
case 'D':
set_detach();
break;
netdev_close(netdev);
}
+
+ /* Rate limiting. */
+ if (s->rate_limit) {
+ if (s->rate_limit < 100) {
+ VLOG_WARN("Rate limit set to unusually low value %d",
+ s->rate_limit);
+ }
+ if (!s->burst_limit) {
+ s->burst_limit = s->rate_limit / 4;
+ }
+ s->burst_limit = MAX(s->burst_limit, 1);
+ s->burst_limit = MIN(s->burst_limit, INT_MAX / 1000);
+ }
}
static void
" attempts (default: 15 seconds)\n"
" -l, --listen=METHOD allow management connections on METHOD\n"
" (a passive OpenFlow connection method)\n"
+ "\nRate-limiting of \"packet-in\" messages to the controller:\n"
+ " --rate-limit[=PACKETS] max rate, in packets/s (default: 1000)\n"
+ " --burst-limit=BURST limit on packet credit for idle time\n"
"\nOther options:\n"
" -D, --detach run in background as daemon\n"
" -P, --pidfile[=FILE] create pidfile (default: %s/secchan.pid)\n"