From: Ben Pfaff Date: Wed, 16 Sep 2009 22:12:19 +0000 (-0700) Subject: secchan: Better tolerate failing controller admission control in fail-open. X-Git-Tag: v0.90.6~12 X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=7778bd15dacc1e410b60ff6ec2996c475a875e6e;p=sliver-openvswitch.git secchan: Better tolerate failing controller admission control in fail-open. When the switch is configured to connect to a controller that accepts connections, waits a few seconds, and then disconnects without setting up flows, currently this causes "fail-open" to flush the flow table and stop setting up new flows during the connection duration. This is OK if it happens once, but it can easily happen every 8 seconds with typical backoff settings, and that isn't so great. This commit changes fail-open to only flush the flow table once the switch appears to have been admitted by the controller, which prevents these frequent network interruptions. Thanks to Jesse Gross for especially valuable feedback. QA notes: Behavior in fail-open and especially behavior with a controller that rejects the switch after it connects needs to be re-tested. The ovs-controller --mute switch added by this commit is one simple way to create such a controller. CC: Peter Balland Bug #1695. Bug #2055. --- diff --git a/lib/rconn.c b/lib/rconn.c index be030d4a4..c0bc95d8f 100644 --- a/lib/rconn.c +++ b/lib/rconn.c @@ -499,7 +499,7 @@ rconn_recv(struct rconn *rc) int error = vconn_recv(rc->vconn, &buffer); if (!error) { copy_to_monitor(rc, buffer); - if (is_admitted_msg(buffer) + if (rc->probably_admitted || is_admitted_msg(buffer) || time_now() - rc->last_connected >= 30) { rc->probably_admitted = true; rc->last_admitted = time_now(); @@ -637,15 +637,22 @@ rconn_is_connected(const struct rconn *rconn) return is_connected_state(rconn->state); } -/* Returns 0 if 'rconn' is connected. Otherwise, if 'rconn' is in a "failure - * mode" (that is, it is not connected), returns the number of seconds that it - * has been in failure mode, ignoring any times that it connected but the - * controller's admission control policy caused it to be quickly - * disconnected. */ +/* Returns true if 'rconn' is connected and thought to have been accepted by + * the peer's admission-control policy. */ +bool +rconn_is_admitted(const struct rconn *rconn) +{ + return (rconn_is_connected(rconn) + && rconn->last_admitted >= rconn->last_connected); +} + +/* Returns 0 if 'rconn' is currently connected and considered to have been + * accepted by the peer's admission-control policy, otherwise the number of + * seconds since 'rconn' was last in such a state. */ int rconn_failure_duration(const struct rconn *rconn) { - return rconn_is_connected(rconn) ? 0 : time_now() - rconn->last_admitted; + return rconn_is_admitted(rconn) ? 0 : time_now() - rconn->last_admitted; } /* Returns the IP address of the peer, or 0 if the peer's IP address is not diff --git a/lib/rconn.h b/lib/rconn.h index ed0780a3f..ef4e16c8b 100644 --- a/lib/rconn.h +++ b/lib/rconn.h @@ -69,6 +69,7 @@ void rconn_add_monitor(struct rconn *, struct vconn *); const char *rconn_get_name(const struct rconn *); bool rconn_is_alive(const struct rconn *); bool rconn_is_connected(const struct rconn *); +bool rconn_is_admitted(const struct rconn *); int rconn_failure_duration(const struct rconn *); bool rconn_is_connectivity_questionable(struct rconn *); diff --git a/secchan/fail-open.c b/secchan/fail-open.c index 60890d496..48f706945 100644 --- a/secchan/fail-open.c +++ b/secchan/fail-open.c @@ -21,38 +21,102 @@ #include "flow.h" #include "mac-learning.h" #include "odp-util.h" +#include "ofpbuf.h" #include "ofproto.h" +#include "pktbuf.h" +#include "poll-loop.h" #include "rconn.h" #include "status.h" #include "timeval.h" +#include "vconn.h" #define THIS_MODULE VLM_fail_open #include "vlog.h" +/* + * Fail-open mode. + * + * In fail-open mode, the switch detects when the controller cannot be + * contacted or when the controller is dropping switch connections because the + * switch does not pass its admission control policy. In those situations the + * switch sets up flows itself using the "normal" action. + * + * There is a little subtlety to implementation, to properly handle the case + * where the controller allows switch connections but drops them a few seconds + * later for admission control reasons. Because of this case, we don't want to + * just stop setting up flows when we connect to the controller: if we did, + * then new flow setup and existing flows would stop during the duration of + * connection to the controller, and thus the whole network would go down for + * that period of time. + * + * So, instead, we add some special caseswhen we are connected to a controller, + * but not yet sure that it has admitted us: + * + * - We set up flows immediately ourselves, but simultaneously send out an + * OFPT_PACKET_IN to the controller. We put a special bogus buffer-id in + * these OFPT_PACKET_IN messages so that duplicate packets don't get sent + * out to the network when the controller replies. + * + * - We also send out OFPT_PACKET_IN messages for totally bogus packets + * every so often, in case no real new flows are arriving in the network. + * + * - We don't flush the flow table at the time we connect, because this + * could cause network stuttering in a switch with lots of flows or very + * high-bandwidth flows by suddenly throwing lots of packets down to + * userspace. + */ + struct fail_open { struct ofproto *ofproto; struct rconn *controller; int trigger_duration; int last_disconn_secs; struct status_category *ss_cat; + long long int next_bogus_packet_in; + struct rconn_packet_counter *bogus_packet_counter; }; -/* Causes the switch to enter or leave fail-open mode, if appropriate. */ -void -fail_open_run(struct fail_open *fo) +/* Returns true if 'fo' should be in fail-open mode, otherwise false. */ +static inline bool +should_fail_open(const struct fail_open *fo) { - int disconn_secs = rconn_failure_duration(fo->controller); - bool open = disconn_secs >= fo->trigger_duration; - if (open != (fo->last_disconn_secs != 0)) { - if (!open) { - flow_t flow; + return rconn_failure_duration(fo->controller) >= fo->trigger_duration; +} + +/* Returns true if 'fo' is currently in fail-open mode, otherwise false. */ +bool +fail_open_is_active(const struct fail_open *fo) +{ + return fo->last_disconn_secs != 0; +} - VLOG_WARN("No longer in fail-open mode"); - fo->last_disconn_secs = 0; +static void +send_bogus_packet_in(struct fail_open *fo) +{ + uint8_t mac[ETH_ADDR_LEN]; + struct ofpbuf *opi; + struct ofpbuf b; - memset(&flow, 0, sizeof flow); - ofproto_delete_flow(fo->ofproto, &flow, OFPFW_ALL, 70000); - } else { + /* Compose ofp_packet_in. */ + ofpbuf_init(&b, 128); + eth_addr_random(mac); + compose_benign_packet(&b, "Open vSwitch Controller Probe", 0xa033, mac); + opi = make_packet_in(pktbuf_get_null(), OFPP_LOCAL, OFPR_NO_MATCH, &b, 64); + ofpbuf_uninit(&b); + + /* Send. */ + rconn_send_with_limit(fo->controller, opi, fo->bogus_packet_counter, 1); +} + +/* Enter fail-open mode if we should be in it. Handle reconnecting to a + * controller from fail-open mode. */ +void +fail_open_run(struct fail_open *fo) +{ + /* Enter fail-open mode if 'fo' is not in it but should be. */ + if (should_fail_open(fo)) { + int disconn_secs = rconn_failure_duration(fo->controller); + if (!fail_open_is_active(fo)) { VLOG_WARN("Could not connect to controller (or switch failed " "controller's post-connection admission control " "policy) for %d seconds, failing open", disconn_secs); @@ -62,18 +126,53 @@ fail_open_run(struct fail_open *fo) * fail-open rule from fail_open_flushed() when * ofproto_flush_flows() calls back to us. */ ofproto_flush_flows(fo->ofproto); + } else if (disconn_secs > fo->last_disconn_secs + 60) { + VLOG_INFO("Still in fail-open mode after %d seconds disconnected " + "from controller", disconn_secs); + fo->last_disconn_secs = disconn_secs; } - } else if (open && disconn_secs > fo->last_disconn_secs + 60) { - VLOG_INFO("Still in fail-open mode after %d seconds disconnected " - "from controller", disconn_secs); - fo->last_disconn_secs = disconn_secs; } + + /* Schedule a bogus packet-in if we're connected and in fail-open. */ + if (fail_open_is_active(fo)) { + if (rconn_is_connected(fo->controller)) { + bool expired = time_msec() >= fo->next_bogus_packet_in; + if (expired) { + send_bogus_packet_in(fo); + } + if (expired || fo->next_bogus_packet_in == LLONG_MAX) { + fo->next_bogus_packet_in = time_msec() + 2000; + } + } else { + fo->next_bogus_packet_in = LLONG_MAX; + } + } + } +/* If 'fo' is currently in fail-open mode and its rconn has connected to the + * controller, exits fail open mode. */ void -fail_open_wait(struct fail_open *fo UNUSED) +fail_open_maybe_recover(struct fail_open *fo) { - /* Nothing to do. */ + if (fail_open_is_active(fo) && rconn_is_admitted(fo->controller)) { + flow_t flow; + + VLOG_WARN("No longer in fail-open mode"); + fo->last_disconn_secs = 0; + fo->next_bogus_packet_in = LLONG_MAX; + + memset(&flow, 0, sizeof flow); + ofproto_delete_flow(fo->ofproto, &flow, OFPFW_ALL, FAIL_OPEN_PRIORITY); + } +} + +void +fail_open_wait(struct fail_open *fo) +{ + if (fo->next_bogus_packet_in != LLONG_MAX) { + poll_timer_wait(fo->next_bogus_packet_in - time_msec()); + } } void @@ -92,7 +191,7 @@ fail_open_flushed(struct fail_open *fo) action.output.len = htons(sizeof action); action.output.port = htons(OFPP_NORMAL); memset(&flow, 0, sizeof flow); - ofproto_add_flow(fo->ofproto, &flow, OFPFW_ALL, 70000, + ofproto_add_flow(fo->ofproto, &flow, OFPFW_ALL, FAIL_OPEN_PRIORITY, &action, 1, 0); } } @@ -121,6 +220,8 @@ fail_open_create(struct ofproto *ofproto, fo->last_disconn_secs = 0; fo->ss_cat = switch_status_register(switch_status, "fail-open", fail_open_status_cb, fo); + fo->next_bogus_packet_in = LLONG_MAX; + fo->bogus_packet_counter = rconn_packet_counter_create(); return fo; } @@ -136,6 +237,7 @@ fail_open_destroy(struct fail_open *fo) if (fo) { /* We don't own fo->controller. */ switch_status_unregister(fo->ss_cat); + rconn_packet_counter_destroy(fo->bogus_packet_counter); free(fo); } } diff --git a/secchan/fail-open.h b/secchan/fail-open.h index c0ada2eba..900d587e4 100644 --- a/secchan/fail-open.h +++ b/secchan/fail-open.h @@ -26,13 +26,21 @@ struct ofproto; struct rconn; struct switch_status; +/* Priority of the rule added by the fail-open subsystem when a switch enters + * fail-open mode. This priority value uniquely identifies a fail-open flow + * (OpenFlow priorities max out at 65535 and nothing else in Open vSwitch + * creates flows with this priority). */ +#define FAIL_OPEN_PRIORITY 70000 + struct fail_open *fail_open_create(struct ofproto *, int trigger_duration, struct switch_status *, struct rconn *controller); void fail_open_set_trigger_duration(struct fail_open *, int trigger_duration); void fail_open_destroy(struct fail_open *); void fail_open_wait(struct fail_open *); +bool fail_open_is_active(const struct fail_open *); void fail_open_run(struct fail_open *); +void fail_open_maybe_recover(struct fail_open *); void fail_open_flushed(struct fail_open *); #endif /* fail-open.h */ diff --git a/secchan/ofproto.c b/secchan/ofproto.c index 72adbc4c5..de1252a36 100644 --- a/secchan/ofproto.c +++ b/secchan/ofproto.c @@ -811,9 +811,6 @@ ofproto_run1(struct ofproto *p) } } } - if (p->fail_open) { - fail_open_run(p->fail_open); - } pinsched_run(p->miss_sched, send_packet_in_miss, p); pinsched_run(p->action_sched, send_packet_in_action, p); if (p->executer) { @@ -825,6 +822,12 @@ ofproto_run1(struct ofproto *p) ofconn_run(ofconn, p); } + /* Fail-open maintenance. Do this after processing the ofconns since + * fail-open checks the status of the controller rconn. */ + if (p->fail_open) { + fail_open_run(p->fail_open); + } + for (i = 0; i < p->n_listeners; i++) { struct vconn *vconn; int retval; @@ -1352,6 +1355,9 @@ ofconn_run(struct ofconn *ofconn, struct ofproto *p) if (!of_msg) { break; } + if (p->fail_open) { + fail_open_maybe_recover(p->fail_open); + } handle_openflow(ofconn, p, of_msg); ofpbuf_delete(of_msg); } @@ -2162,7 +2168,7 @@ handle_packet_out(struct ofproto *p, struct ofconn *ofconn, if (opo->buffer_id != htonl(UINT32_MAX)) { error = pktbuf_retrieve(ofconn->pktbuf, ntohl(opo->buffer_id), &buffer, &in_port); - if (error) { + if (error || !buffer) { return error; } payload = *buffer; @@ -3078,7 +3084,23 @@ handle_odp_msg(struct ofproto *p, struct ofpbuf *packet) rule_execute(p, rule, &payload, &flow); rule_reinstall(p, rule); - ofpbuf_delete(packet); + + if (rule->super && rule->super->cr.priority == FAIL_OPEN_PRIORITY + && rconn_is_connected(p->controller->rconn)) { + /* + * Extra-special case for fail-open mode. + * + * We are in fail-open mode and the packet matched the fail-open rule, + * but we are connected to a controller too. We should send the packet + * up to the controller in the hope that it will try to set up a flow + * and thereby allow us to exit fail-open. + * + * See the top-level comment in fail-open.c for more information. + */ + pinsched_send(p->miss_sched, in_port, packet, send_packet_in_miss, p); + } else { + ofpbuf_delete(packet); + } } static void @@ -3302,6 +3324,7 @@ static void send_packet_in_miss(struct ofpbuf *packet, void *p_) { struct ofproto *p = p_; + bool in_fail_open = p->fail_open && fail_open_is_active(p->fail_open); struct ofconn *ofconn; struct ofpbuf payload; struct odp_msg *msg; @@ -3311,8 +3334,10 @@ send_packet_in_miss(struct ofpbuf *packet, void *p_) payload.size = msg->length - sizeof *msg; LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) { if (ofconn->miss_send_len) { - uint32_t buffer_id = pktbuf_save(ofconn->pktbuf, &payload, - msg->port); + struct pktbuf *pb = ofconn->pktbuf; + uint32_t buffer_id = (in_fail_open + ? pktbuf_get_null() + : pktbuf_save(pb, &payload, msg->port)); int send_len = (buffer_id != UINT32_MAX ? ofconn->miss_send_len : UINT32_MAX); do_send_packet_in(ofconn, buffer_id, packet, send_len); diff --git a/secchan/pktbuf.c b/secchan/pktbuf.c index b4198a8f6..450cc3b6f 100644 --- a/secchan/pktbuf.c +++ b/secchan/pktbuf.c @@ -51,6 +51,7 @@ struct packet { struct pktbuf { struct packet packets[PKTBUF_CNT]; unsigned int buffer_idx; + unsigned int null_idx; }; int @@ -78,6 +79,22 @@ pktbuf_destroy(struct pktbuf *pb) } } +static unsigned int +make_id(unsigned int buffer_idx, unsigned int cookie) +{ + return buffer_idx | (cookie << PKTBUF_BITS); +} + +/* Attempts to allocate an OpenFlow packet buffer id within 'pb'. The packet + * buffer will store a copy of 'buffer' and the port number 'in_port', which + * should be the datapath port number on which 'buffer' was received. + * + * If successful, returns the packet buffer id (a number other than + * UINT32_MAX). pktbuf_retrieve() can later be used to retrieve the buffer and + * its input port number (buffers do expire after a time, so this is not + * guaranteed to be true forever). On failure, returns UINT32_MAX. + * + * The caller retains ownership of 'buffer'. */ uint32_t pktbuf_save(struct pktbuf *pb, struct ofpbuf *buffer, uint16_t in_port) { @@ -97,9 +114,46 @@ pktbuf_save(struct pktbuf *pb, struct ofpbuf *buffer, uint16_t in_port) p->buffer = ofpbuf_clone(buffer); p->timeout = time_msec() + OVERWRITE_MSECS; p->in_port = in_port; - return (p - pb->packets) | (p->cookie << PKTBUF_BITS); + return make_id(p - pb->packets, p->cookie); +} + +/* + * Allocates and returns a "null" packet buffer id. The returned packet buffer + * id is considered valid by pktbuf_retrieve(), but it is not associated with + * actual buffered data. + * + * This function is always successful. + * + * This is useful in one special case: with the current OpenFlow design, the + * "fail-open" code cannot always know whether a connection to a controller is + * actually valid until it receives a OFPT_PACKET_OUT or OFPT_FLOW_MOD request, + * but at that point the packet in question has already been forwarded (since + * we are still in "fail-open" mode). If the packet was buffered in the usual + * way, then the OFPT_PACKET_OUT or OFPT_FLOW_MOD would cause a duplicate + * packet in the network. Null packet buffer ids identify such a packet that + * has already been forwarded, so that Open vSwitch can quietly ignore the + * request to re-send it. (After that happens, the switch exits fail-open + * mode.) + * + * See the top-level comment in fail-open.c for an overview. + */ +uint32_t +pktbuf_get_null(void) +{ + return make_id(0, COOKIE_MAX); } +/* Attempts to retrieve a saved packet with the given 'id' from 'pb'. Returns + * 0 if successful, otherwise an OpenFlow error code constructed with + * ofp_mkerr(). + * + * On success, ordinarily stores the buffered packet in '*bufferp' and the + * datapath port number on which the packet was received in '*in_port'. The + * caller becomes responsible for freeing the buffer. However, if 'id' + * identifies a "null" packet buffer (created with pktbuf_get_null()), stores + * NULL in '*bufferp' and -1 in '*in_port'. + * + * On failure, stores NULL in in '*bufferp' and -1 in '*in_port'. */ int pktbuf_retrieve(struct pktbuf *pb, uint32_t id, struct ofpbuf **bufferp, uint16_t *in_port) @@ -128,11 +182,16 @@ pktbuf_retrieve(struct pktbuf *pb, uint32_t id, struct ofpbuf **bufferp, VLOG_WARN_RL(&rl, "attempt to reuse buffer %08"PRIx32, id); error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BUFFER_EMPTY); } - } else { + } else if (id >> PKTBUF_BITS != COOKIE_MAX) { COVERAGE_INC(pktbuf_bad_cookie); VLOG_WARN_RL(&rl, "cookie mismatch: %08"PRIx32" != %08"PRIx32, id, (id & PKTBUF_MASK) | (p->cookie << PKTBUF_BITS)); error = ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_COOKIE); + } else { + COVERAGE_INC(pktbuf_null_cookie); + VLOG_INFO_RL(&rl, "Received null cookie %08"PRIx32" (this is normal " + "if the switch was recently in fail-open mode)", id); + error = 0; } *bufferp = NULL; *in_port = -1; diff --git a/secchan/pktbuf.h b/secchan/pktbuf.h index b27b7490a..67f4973c2 100644 --- a/secchan/pktbuf.h +++ b/secchan/pktbuf.h @@ -27,6 +27,7 @@ int pktbuf_capacity(void); struct pktbuf *pktbuf_create(void); void pktbuf_destroy(struct pktbuf *); uint32_t pktbuf_save(struct pktbuf *, struct ofpbuf *buffer, uint16_t in_port); +uint32_t pktbuf_get_null(void); int pktbuf_retrieve(struct pktbuf *, uint32_t id, struct ofpbuf **bufferp, uint16_t *in_port); void pktbuf_discard(struct pktbuf *, uint32_t id); diff --git a/utilities/ovs-controller.8.in b/utilities/ovs-controller.8.in index 750fcea5f..43bc43bcb 100644 --- a/utilities/ovs-controller.8.in +++ b/utilities/ovs-controller.8.in @@ -113,6 +113,13 @@ through the controller and every packet is flooded. This option is most useful for debugging. It reduces switching performance, so it should not be used in production. +.IP "\fB--mute\fR" +Prevents ovs\-controller from replying to any OpenFlow messages sent +to it by switches. +.IP +This option is only for debugging the Open vSwitch implementation of +``fail open'' mode. It must not be used in production. + .so lib/daemon.man .so lib/vlog.man .so lib/common.man diff --git a/utilities/ovs-controller.c b/utilities/ovs-controller.c index 010cad794..314da1868 100644 --- a/utilities/ovs-controller.c +++ b/utilities/ovs-controller.c @@ -58,6 +58,10 @@ static bool setup_flows = true; /* --max-idle: Maximum idle time, in seconds, before flows expire. */ static int max_idle = 60; +/* --mute: If true, accept connections from switches but do not reply to any + * of their messages (for debugging fail-open mode). */ +static bool mute = false; + static int do_switching(struct switch_ *); static void new_switch(struct switch_ *, struct vconn *, const char *name); static void parse_options(int argc, char *argv[]); @@ -211,7 +215,9 @@ do_switching(struct switch_ *sw) msg = rconn_recv(sw->rconn); if (msg) { - lswitch_process_packet(sw->lswitch, sw->rconn, msg); + if (!mute) { + lswitch_process_packet(sw->lswitch, sw->rconn, msg); + } ofpbuf_delete(msg); } rconn_run(sw->rconn); @@ -227,12 +233,14 @@ parse_options(int argc, char *argv[]) enum { OPT_MAX_IDLE = UCHAR_MAX + 1, OPT_PEER_CA_CERT, + OPT_MUTE, VLOG_OPTION_ENUMS }; static struct option long_options[] = { {"hub", no_argument, 0, 'H'}, {"noflow", no_argument, 0, 'n'}, {"max-idle", required_argument, 0, OPT_MAX_IDLE}, + {"mute", no_argument, 0, OPT_MUTE}, {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'V'}, DAEMON_LONG_OPTIONS, @@ -263,6 +271,10 @@ parse_options(int argc, char *argv[]) setup_flows = false; break; + case OPT_MUTE: + mute = true; + break; + case OPT_MAX_IDLE: if (!strcmp(optarg, "permanent")) { max_idle = OFP_FLOW_PERMANENT;