X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Frconn.c;h=aa8b7e305a65d1825746654f87879a5d4e873224;hb=e0edde6fee279cdbbf3c179f5f50adaf0c7c7f1e;hp=1301f25cac2822a427c689ef8f9b4c2634fe9601;hpb=193456d581423f894e57e8463ff5049c0d802f0a;p=sliver-openvswitch.git diff --git a/lib/rconn.c b/lib/rconn.c index 1301f25ca..aa8b7e305 100644 --- a/lib/rconn.c +++ b/lib/rconn.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009 Nicira Networks. + * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include #include "coverage.h" +#include "ofp-util.h" #include "ofpbuf.h" #include "openflow/openflow.h" #include "poll-loop.h" @@ -29,10 +30,15 @@ #include "timeval.h" #include "util.h" #include "vconn.h" - -#define THIS_MODULE VLM_rconn #include "vlog.h" +VLOG_DEFINE_THIS_MODULE(rconn); + +COVERAGE_DEFINE(rconn_discarded); +COVERAGE_DEFINE(rconn_overflow); +COVERAGE_DEFINE(rconn_queued); +COVERAGE_DEFINE(rconn_sent); + #define STATES \ STATE(VOID, 1 << 0) \ STATE(BACKOFF, 1 << 1) \ @@ -64,18 +70,21 @@ struct rconn { time_t state_entered; struct vconn *vconn; - char *name; + char *name; /* Human-readable descriptive name. */ + char *target; /* vconn name, passed to vconn_open(). */ bool reliable; - struct ovs_queue txq; + struct list txq; /* Contains "struct ofpbuf"s. */ int backoff; int max_backoff; time_t backoff_deadline; time_t last_received; time_t last_connected; + time_t last_disconnected; unsigned int packets_sent; unsigned int seqno; + int last_error; /* In S_ACTIVE and S_IDLE, probably_admitted reports whether we believe * that the peer has made a (positive) admission control decision on our @@ -89,28 +98,31 @@ struct rconn { time_t last_admitted; /* These values are simply for statistics reporting, not used directly by - * anything internal to the rconn (or the secchan for that matter). */ + * anything internal to the rconn (or ofproto for that matter). */ unsigned int packets_received; unsigned int n_attempted_connections, n_successful_connections; time_t creation_time; unsigned long int total_time_connected; - /* If we can't connect to the peer, it could be for any number of reasons. - * Usually, one would assume it is because the peer is not running or - * because the network is partitioned. But it could also be because the - * network topology has changed, in which case the upper layer will need to - * reassess it (in particular, obtain a new IP address via DHCP and find - * the new location of the controller). We set this flag when we suspect - * that this could be the case. */ - bool questionable_connectivity; - time_t last_questioned; - /* Throughout this file, "probe" is shorthand for "inactivity probe". * When nothing has been received from the peer for a while, we send out * an echo request as an inactivity probe packet. We should receive back * a response. */ int probe_interval; /* Secs of inactivity before sending probe. */ + /* When we create a vconn we obtain these values, to save them past the end + * of the vconn's lifetime. Otherwise, in-band control will only allow + * traffic when a vconn is actually open, but it is nice to allow ARP to + * complete even between connection attempts, and it is also polite to + * allow traffic from other switches to go through to the controller + * whether or not we are connected. + * + * We don't cache the local port, because that changes from one connection + * attempt to the next. */ + ovs_be32 local_ip, remote_ip; + ovs_be16 remote_port; + uint8_t dscp; + /* Messages sent or received are copied to the monitor connections. */ #define MAX_MONITORS 8 struct vconn *monitors[8]; @@ -121,32 +133,17 @@ static unsigned int elapsed_in_this_state(const struct rconn *); static unsigned int timeout(const struct rconn *); static bool timed_out(const struct rconn *); static void state_transition(struct rconn *, enum state); +static void rconn_set_target__(struct rconn *, + const char *target, const char *name); static int try_send(struct rconn *); -static int reconnect(struct rconn *); +static void reconnect(struct rconn *); +static void report_error(struct rconn *, int error); static void disconnect(struct rconn *, int error); static void flush_queue(struct rconn *); -static void question_connectivity(struct rconn *); static void copy_to_monitor(struct rconn *, const struct ofpbuf *); static bool is_connected_state(enum state); static bool is_admitted_msg(const struct ofpbuf *); - -/* Creates a new rconn, connects it (reliably) to 'name', and returns it. */ -struct rconn * -rconn_new(const char *name, int inactivity_probe_interval, int max_backoff) -{ - struct rconn *rc = rconn_create(inactivity_probe_interval, max_backoff); - rconn_connect(rc, name); - return rc; -} - -/* Creates a new rconn, connects it (unreliably) to 'vconn', and returns it. */ -struct rconn * -rconn_new_from_vconn(const char *name, struct vconn *vconn) -{ - struct rconn *rc = rconn_create(60, 0); - rconn_connect_unreliably(rc, name, vconn); - return rc; -} +static bool rconn_logging_connection_attempts__(const struct rconn *); /* Creates and returns a new rconn. * @@ -159,26 +156,31 @@ rconn_new_from_vconn(const char *name, struct vconn *vconn) * 'max_backoff' is the maximum number of seconds between attempts to connect * to the peer. The actual interval starts at 1 second and doubles on each * failure until it reaches 'max_backoff'. If 0 is specified, the default of - * 60 seconds is used. */ + * 8 seconds is used. + * + * The new rconn is initially unconnected. Use rconn_connect() or + * rconn_connect_unreliably() to connect it. */ struct rconn * -rconn_create(int probe_interval, int max_backoff) +rconn_create(int probe_interval, int max_backoff, uint8_t dscp) { - struct rconn *rc = xcalloc(1, sizeof *rc); + struct rconn *rc = xzalloc(sizeof *rc); rc->state = S_VOID; rc->state_entered = time_now(); rc->vconn = NULL; rc->name = xstrdup("void"); + rc->target = xstrdup("void"); rc->reliable = false; - queue_init(&rc->txq); + list_init(&rc->txq); rc->backoff = 0; - rc->max_backoff = max_backoff ? max_backoff : 60; + rc->max_backoff = max_backoff ? max_backoff : 8; rc->backoff_deadline = TIME_MIN; rc->last_received = time_now(); - rc->last_connected = time_now(); + rc->last_connected = TIME_MIN; + rc->last_disconnected = TIME_MIN; rc->seqno = 0; rc->packets_sent = 0; @@ -192,10 +194,8 @@ rconn_create(int probe_interval, int max_backoff) rc->creation_time = time_now(); rc->total_time_connected = 0; - rc->questionable_connectivity = false; - rc->last_questioned = time_now(); - rconn_set_probe_interval(rc, probe_interval); + rconn_set_dscp(rc, dscp); rc->n_monitors = 0; @@ -220,6 +220,12 @@ rconn_get_max_backoff(const struct rconn *rc) return rc->max_backoff; } +void +rconn_set_dscp(struct rconn *rc, uint8_t dscp) +{ + rc->dscp = dscp; +} + void rconn_set_probe_interval(struct rconn *rc, int probe_interval) { @@ -232,24 +238,37 @@ rconn_get_probe_interval(const struct rconn *rc) return rc->probe_interval; } -int -rconn_connect(struct rconn *rc, const char *name) +/* Drops any existing connection on 'rc', then sets up 'rc' to connect to + * 'target' and reconnect as needed. 'target' should be a remote OpenFlow + * target in a form acceptable to vconn_open(). + * + * If 'name' is nonnull, then it is used in log messages in place of 'target'. + * It should presumably give more information to a human reader than 'target', + * but it need not be acceptable to vconn_open(). */ +void +rconn_connect(struct rconn *rc, const char *target, const char *name) { rconn_disconnect(rc); - free(rc->name); - rc->name = xstrdup(name); + rconn_set_target__(rc, target, name); rc->reliable = true; - return reconnect(rc); + reconnect(rc); } +/* Drops any existing connection on 'rc', then configures 'rc' to use + * 'vconn'. If the connection on 'vconn' drops, 'rc' will not reconnect on it + * own. + * + * By default, the target obtained from vconn_get_name(vconn) is used in log + * messages. If 'name' is nonnull, then it is used instead. It should + * presumably give more information to a human reader than the target, but it + * need not be acceptable to vconn_open(). */ void rconn_connect_unreliably(struct rconn *rc, - const char *name, struct vconn *vconn) + struct vconn *vconn, const char *name) { assert(vconn != NULL); rconn_disconnect(rc); - free(rc->name); - rc->name = xstrdup(name); + rconn_set_target__(rc, vconn_get_name(vconn), name); rc->reliable = false; rc->vconn = vconn; rc->last_connected = time_now(); @@ -261,6 +280,7 @@ void rconn_reconnect(struct rconn *rc) { if (rc->state & (S_ACTIVE | S_IDLE)) { + VLOG_INFO("%s: disconnecting", rc->name); disconnect(rc, 0); } } @@ -273,8 +293,7 @@ rconn_disconnect(struct rconn *rc) vconn_close(rc->vconn); rc->vconn = NULL; } - free(rc->name); - rc->name = xstrdup("void"); + rconn_set_target__(rc, "void", NULL); rc->reliable = false; rc->backoff = 0; @@ -292,9 +311,10 @@ rconn_destroy(struct rconn *rc) size_t i; free(rc->name); + free(rc->target); vconn_close(rc->vconn); flush_queue(rc); - queue_destroy(&rc->txq); + ofpbuf_list_delete(&rc->txq); for (i = 0; i < rc->n_monitors; i++) { vconn_close(rc->monitors[i]); } @@ -303,34 +323,38 @@ rconn_destroy(struct rconn *rc) } static unsigned int -timeout_VOID(const struct rconn *rc UNUSED) +timeout_VOID(const struct rconn *rc OVS_UNUSED) { return UINT_MAX; } static void -run_VOID(struct rconn *rc UNUSED) +run_VOID(struct rconn *rc OVS_UNUSED) { /* Nothing to do. */ } -static int +static void reconnect(struct rconn *rc) { int retval; - VLOG_INFO("%s: connecting...", rc->name); + if (rconn_logging_connection_attempts__(rc)) { + VLOG_INFO("%s: connecting...", rc->name); + } rc->n_attempted_connections++; - retval = vconn_open(rc->name, OFP_VERSION, &rc->vconn); + retval = vconn_open(rc->target, OFP10_VERSION, &rc->vconn, rc->dscp); if (!retval) { + rc->remote_ip = vconn_get_remote_ip(rc->vconn); + rc->local_ip = vconn_get_local_ip(rc->vconn); + rc->remote_port = vconn_get_remote_port(rc->vconn); rc->backoff_deadline = time_now() + rc->backoff; state_transition(rc, S_CONNECTING); } else { VLOG_WARN("%s: connection failed (%s)", rc->name, strerror(retval)); rc->backoff_deadline = TIME_MAX; /* Prevent resetting backoff. */ - disconnect(rc, 0); + disconnect(rc, retval); } - return retval; } static unsigned int @@ -363,28 +387,33 @@ run_CONNECTING(struct rconn *rc) state_transition(rc, S_ACTIVE); rc->last_connected = rc->state_entered; } else if (retval != EAGAIN) { - VLOG_INFO("%s: connection failed (%s)", rc->name, strerror(retval)); + if (rconn_logging_connection_attempts__(rc)) { + VLOG_INFO("%s: connection failed (%s)", + rc->name, strerror(retval)); + } disconnect(rc, retval); } else if (timed_out(rc)) { - VLOG_INFO("%s: connection timed out", rc->name); + if (rconn_logging_connection_attempts__(rc)) { + VLOG_INFO("%s: connection timed out", rc->name); + } rc->backoff_deadline = TIME_MAX; /* Prevent resetting backoff. */ - disconnect(rc, 0); + disconnect(rc, ETIMEDOUT); } } static void do_tx_work(struct rconn *rc) { - if (!rc->txq.n) { + if (list_is_empty(&rc->txq)) { return; } - while (rc->txq.n > 0) { + while (!list_is_empty(&rc->txq)) { int error = try_send(rc); if (error) { break; } } - if (!rc->txq.n) { + if (list_is_empty(&rc->txq)) { poll_immediate_wake(); } } @@ -429,11 +458,10 @@ static void run_IDLE(struct rconn *rc) { if (timed_out(rc)) { - question_connectivity(rc); VLOG_ERR("%s: no response to inactivity probe after %u " "seconds, disconnecting", rc->name, elapsed_in_this_state(rc)); - disconnect(rc, 0); + disconnect(rc, ETIMEDOUT); } else { do_tx_work(rc); } @@ -446,6 +474,15 @@ void rconn_run(struct rconn *rc) { int old_state; + size_t i; + + if (rc->vconn) { + vconn_run(rc->vconn); + } + for (i = 0; i < rc->n_monitors; i++) { + vconn_run(rc->monitors[i]); + } + do { old_state = rc->state; switch (rc->state) { @@ -463,15 +500,23 @@ rconn_run(struct rconn *rc) void rconn_run_wait(struct rconn *rc) { - unsigned int timeo = timeout(rc); - if (timeo != UINT_MAX) { - unsigned int expires = sat_add(rc->state_entered, timeo); - unsigned int remaining = sat_sub(expires, time_now()); - poll_timer_wait(sat_mul(remaining, 1000)); + unsigned int timeo; + size_t i; + + if (rc->vconn) { + vconn_run_wait(rc->vconn); + if ((rc->state & (S_ACTIVE | S_IDLE)) && !list_is_empty(&rc->txq)) { + vconn_wait(rc->vconn, WAIT_SEND); + } + } + for (i = 0; i < rc->n_monitors; i++) { + vconn_run_wait(rc->monitors[i]); } - if ((rc->state & (S_ACTIVE | S_IDLE)) && rc->txq.n) { - vconn_wait(rc->vconn, WAIT_SEND); + timeo = timeout(rc); + if (timeo != UINT_MAX) { + long long int expires = sat_add(rc->state_entered, timeo); + poll_timer_wait_until(expires * 1000); } } @@ -486,7 +531,7 @@ rconn_recv(struct rconn *rc) int error = vconn_recv(rc->vconn, &buffer); if (!error) { copy_to_monitor(rc, buffer); - if (is_admitted_msg(buffer) + if (rc->probably_admitted || is_admitted_msg(buffer) || time_now() - rc->last_connected >= 30) { rc->probably_admitted = true; rc->last_admitted = time_now(); @@ -498,6 +543,7 @@ rconn_recv(struct rconn *rc) } return buffer; } else if (error != EAGAIN) { + report_error(rc, error); disconnect(rc, error); } } @@ -514,9 +560,8 @@ rconn_recv_wait(struct rconn *rc) } } -/* Sends 'b' on 'rc'. Returns 0 if successful (in which case 'b' is - * destroyed), or ENOTCONN if 'rc' is not currently connected (in which case - * the caller retains ownership of 'b'). +/* Sends 'b' on 'rc'. Returns 0 if successful, or ENOTCONN if 'rc' is not + * currently connected. Takes ownership of 'b'. * * If 'counter' is non-null, then 'counter' will be incremented while the * packet is in flight, then decremented when it has been sent (or discarded @@ -534,21 +579,22 @@ rconn_send(struct rconn *rc, struct ofpbuf *b, if (rconn_is_connected(rc)) { COVERAGE_INC(rconn_queued); copy_to_monitor(rc, b); - b->private = counter; + b->private_p = counter; if (counter) { rconn_packet_counter_inc(counter); } - queue_push_tail(&rc->txq, b); + list_push_back(&rc->txq, &b->list_node); /* If the queue was empty before we added 'b', try to send some * packets. (But if the queue had packets in it, it's because the * vconn is backlogged and there's no point in stuffing more into it * now. We'll get back to that in rconn_run().) */ - if (rc->txq.n == 1) { + if (rc->txq.next == &b->list_node) { try_send(rc); } return 0; } else { + ofpbuf_delete(b); return ENOTCONN; } } @@ -573,7 +619,6 @@ rconn_send_with_limit(struct rconn *rc, struct ofpbuf *b, retval = counter->n >= queue_limit ? EAGAIN : rconn_send(rc, b, counter); if (retval) { COVERAGE_INC(rconn_overflow); - ofpbuf_delete(b); } return retval; } @@ -602,13 +647,31 @@ rconn_add_monitor(struct rconn *rc, struct vconn *vconn) } } -/* Returns 'rc''s name (the 'name' argument passed to rconn_new()). */ +/* Returns 'rc''s name. This is a name for human consumption, appropriate for + * use in log messages. It is not necessarily a name that may be passed + * directly to, e.g., vconn_open(). */ const char * rconn_get_name(const struct rconn *rc) { return rc->name; } +/* Sets 'rc''s name to 'new_name'. */ +void +rconn_set_name(struct rconn *rc, const char *new_name) +{ + free(rc->name); + rc->name = xstrdup(new_name); +} + +/* Returns 'rc''s target. This is intended to be a string that may be passed + * directly to, e.g., vconn_open(). */ +const char * +rconn_get_target(const struct rconn *rc) +{ + return rc->target; +} + /* Returns true if 'rconn' is connected or in the process of reconnecting, * false if 'rconn' is disconnected and will not reconnect on its own. */ bool @@ -624,64 +687,63 @@ rconn_is_connected(const struct rconn *rconn) return is_connected_state(rconn->state); } -/* Returns 0 if 'rconn' is connected. Otherwise, if 'rconn' is in a "failure - * mode" (that is, it is not connected), returns the number of seconds that it - * has been in failure mode, ignoring any times that it connected but the - * controller's admission control policy caused it to be quickly - * disconnected. */ +/* Returns true if 'rconn' is connected and thought to have been accepted by + * the peer's admission-control policy. */ +bool +rconn_is_admitted(const struct rconn *rconn) +{ + return (rconn_is_connected(rconn) + && rconn->last_admitted >= rconn->last_connected); +} + +/* Returns 0 if 'rconn' is currently connected and considered to have been + * accepted by the peer's admission-control policy, otherwise the number of + * seconds since 'rconn' was last in such a state. */ int rconn_failure_duration(const struct rconn *rconn) { - return rconn_is_connected(rconn) ? 0 : time_now() - rconn->last_admitted; + return rconn_is_admitted(rconn) ? 0 : time_now() - rconn->last_admitted; } -/* Returns the IP address of the peer, or 0 if the peer is not connected over - * an IP-based protocol or if its IP address is not known. */ -uint32_t -rconn_get_remote_ip(const struct rconn *rconn) +/* Returns the IP address of the peer, or 0 if the peer's IP address is not + * known. */ +ovs_be32 +rconn_get_remote_ip(const struct rconn *rconn) { - return rconn->vconn ? vconn_get_remote_ip(rconn->vconn) : 0; + return rconn->remote_ip; } -/* Returns the transport port of the peer, or 0 if the peer does not - * contain a port or if the port is not known. */ -uint16_t -rconn_get_remote_port(const struct rconn *rconn) +/* Returns the transport port of the peer, or 0 if the peer's port is not + * known. */ +ovs_be16 +rconn_get_remote_port(const struct rconn *rconn) { - return rconn->vconn ? vconn_get_remote_port(rconn->vconn) : 0; + return rconn->remote_port; } /* Returns the IP address used to connect to the peer, or 0 if the - * connection is not an IP-based protocol or if its IP address is not + * connection is not an IP-based protocol or if its IP address is not * known. */ -uint32_t -rconn_get_local_ip(const struct rconn *rconn) +ovs_be32 +rconn_get_local_ip(const struct rconn *rconn) { - return rconn->vconn ? vconn_get_local_ip(rconn->vconn) : 0; + return rconn->local_ip; } /* Returns the transport port used to connect to the peer, or 0 if the * connection does not contain a port or if the port is not known. */ -uint16_t -rconn_get_local_port(const struct rconn *rconn) +ovs_be16 +rconn_get_local_port(const struct rconn *rconn) { return rconn->vconn ? vconn_get_local_port(rconn->vconn) : 0; } -/* If 'rconn' can't connect to the peer, it could be for any number of reasons. - * Usually, one would assume it is because the peer is not running or because - * the network is partitioned. But it could also be because the network - * topology has changed, in which case the upper layer will need to reassess it - * (in particular, obtain a new IP address via DHCP and find the new location - * of the controller). When this appears that this might be the case, this - * function returns true. It also clears the questionability flag and prevents - * it from being set again for some time. */ -bool -rconn_is_connectivity_questionable(struct rconn *rconn) +/* Returns the OpenFlow version negotiated with the peer, or -1 if there is + * currently no connection or if version negotiation is not yet complete. */ +int +rconn_get_version(const struct rconn *rconn) { - bool questionable = rconn->questionable_connectivity; - rconn->questionable_connectivity = false; - return questionable; + return rconn->vconn ? vconn_get_version(rconn->vconn) : -1; } /* Returns the total number of packets successfully received by the underlying @@ -716,13 +778,21 @@ rconn_get_successful_connections(const struct rconn *rc) } /* Returns the time at which the last successful connection was made by - * 'rc'. */ + * 'rc'. Returns TIME_MIN if never connected. */ time_t rconn_get_last_connection(const struct rconn *rc) { return rc->last_connected; } +/* Returns the time at which 'rc' was last disconnected. Returns TIME_MIN + * if never disconnected. */ +time_t +rconn_get_last_disconnect(const struct rconn *rc) +{ + return rc->last_disconnected; +} + /* Returns the time at which the last OpenFlow message was received by 'rc'. * If no packets have been received on 'rc', returns the time at which 'rc' * was created. */ @@ -769,6 +839,22 @@ rconn_get_connection_seqno(const struct rconn *rc) { return rc->seqno; } + +/* Returns a value that explains why 'rc' last disconnected: + * + * - 0 means that the last disconnection was caused by a call to + * rconn_disconnect(), or that 'rc' is new and has not yet completed its + * initial connection or connection attempt. + * + * - EOF means that the connection was closed in the normal way by the peer. + * + * - A positive integer is an errno value that represents the error. + */ +int +rconn_get_last_error(const struct rconn *rc) +{ + return rc->last_error; +} struct rconn_packet_counter * rconn_packet_counter_create(void) @@ -805,17 +891,42 @@ rconn_packet_counter_dec(struct rconn_packet_counter *c) } } +/* Set rc->target and rc->name to 'target' and 'name', respectively. If 'name' + * is null, 'target' is used. + * + * Also, clear out the cached IP address and port information, since changing + * the target also likely changes these values. */ +static void +rconn_set_target__(struct rconn *rc, const char *target, const char *name) +{ + free(rc->name); + rc->name = xstrdup(name ? name : target); + free(rc->target); + rc->target = xstrdup(target); + rc->local_ip = 0; + rc->remote_ip = 0; + rc->remote_port = 0; +} + /* Tries to send a packet from 'rc''s send buffer. Returns 0 if successful, * otherwise a positive errno value. */ static int try_send(struct rconn *rc) { - int retval = 0; - struct ofpbuf *next = rc->txq.head->next; - struct rconn_packet_counter *counter = rc->txq.head->private; - retval = vconn_send(rc->vconn, rc->txq.head); + struct ofpbuf *msg = ofpbuf_from_list(rc->txq.next); + struct rconn_packet_counter *counter = msg->private_p; + int retval; + + /* Eagerly remove 'msg' from the txq. We can't remove it from the list + * after sending, if sending is successful, because it is then owned by the + * vconn, which might have freed it already. */ + list_remove(&msg->list_node); + + retval = vconn_send(rc->vconn, msg); if (retval) { + list_push_front(&rc->txq, &msg->list_node); if (retval != EAGAIN) { + report_error(rc, retval); disconnect(rc, retval); } return retval; @@ -825,30 +936,45 @@ try_send(struct rconn *rc) if (counter) { rconn_packet_counter_dec(counter); } - queue_advance_head(&rc->txq, next); return 0; } -/* Disconnects 'rc'. 'error' is used only for logging purposes. If it is - * nonzero, then it should be EOF to indicate the connection was closed by the - * peer in a normal fashion or a positive errno value. */ +/* Reports that 'error' caused 'rc' to disconnect. 'error' may be a positive + * errno value, or it may be EOF to indicate that the connection was closed + * normally. */ +static void +report_error(struct rconn *rc, int error) +{ + if (error == EOF) { + /* If 'rc' isn't reliable, then we don't really expect this connection + * to last forever anyway (probably it's a connection that we received + * via accept()), so use DBG level to avoid cluttering the logs. */ + enum vlog_level level = rc->reliable ? VLL_INFO : VLL_DBG; + VLOG(level, "%s: connection closed by peer", rc->name); + } else { + VLOG_WARN("%s: connection dropped (%s)", rc->name, strerror(error)); + } +} + +/* Disconnects 'rc' and records 'error' as the error that caused 'rc''s last + * disconnection: + * + * - 0 means that this disconnection is due to a request by 'rc''s client, + * not due to any kind of network error. + * + * - EOF means that the connection was closed in the normal way by the peer. + * + * - A positive integer is an errno value that represents the error. + */ static void disconnect(struct rconn *rc, int error) { + rc->last_error = error; if (rc->reliable) { time_t now = time_now(); if (rc->state & (S_CONNECTING | S_ACTIVE | S_IDLE)) { - if (error > 0) { - VLOG_WARN("%s: connection dropped (%s)", - rc->name, strerror(error)); - } else if (error == EOF) { - if (rc->reliable) { - VLOG_INFO("%s: connection closed by peer", rc->name); - } - } else { - VLOG_INFO("%s: connection dropped", rc->name); - } + rc->last_disconnected = now; vconn_close(rc->vconn); rc->vconn = NULL; flush_queue(rc); @@ -856,17 +982,22 @@ disconnect(struct rconn *rc, int error) if (now >= rc->backoff_deadline) { rc->backoff = 1; - } else { - rc->backoff = MIN(rc->max_backoff, MAX(1, 2 * rc->backoff)); - VLOG_INFO("%s: waiting %d seconds before reconnect\n", + } else if (rc->backoff < rc->max_backoff / 2) { + rc->backoff = MAX(1, 2 * rc->backoff); + VLOG_INFO("%s: waiting %d seconds before reconnect", rc->name, rc->backoff); + } else { + if (rconn_logging_connection_attempts__(rc)) { + VLOG_INFO("%s: continuing to retry connections in the " + "background but suppressing further logging", + rc->name); + } + rc->backoff = rc->max_backoff; } rc->backoff_deadline = now + rc->backoff; state_transition(rc, S_BACKOFF); - if (now - rc->last_connected > 60) { - question_connectivity(rc); - } } else { + rc->last_disconnected = time_now(); rconn_disconnect(rc); } } @@ -876,12 +1007,12 @@ disconnect(struct rconn *rc, int error) static void flush_queue(struct rconn *rc) { - if (!rc->txq.n) { + if (list_is_empty(&rc->txq)) { return; } - while (rc->txq.n > 0) { - struct ofpbuf *b = queue_pop_head(&rc->txq); - struct rconn_packet_counter *counter = b->private; + while (!list_is_empty(&rc->txq)) { + struct ofpbuf *b = ofpbuf_from_list(list_pop_front(&rc->txq)); + struct rconn_packet_counter *counter = b->private_p; if (counter) { rconn_packet_counter_dec(counter); } @@ -930,16 +1061,6 @@ state_transition(struct rconn *rc, enum state state) rc->state_entered = time_now(); } -static void -question_connectivity(struct rconn *rc) -{ - time_t now = time_now(); - if (now - rc->last_questioned > 60) { - rc->questionable_connectivity = true; - rc->last_questioned = now; - } -} - static void copy_to_monitor(struct rconn *rc, const struct ofpbuf *b) { @@ -969,7 +1090,7 @@ copy_to_monitor(struct rconn *rc, const struct ofpbuf *b) } static bool -is_connected_state(enum state state) +is_connected_state(enum state state) { return (state & (S_ACTIVE | S_IDLE)) != 0; } @@ -991,3 +1112,12 @@ is_admitted_msg(const struct ofpbuf *b) (1u << OFPT_GET_CONFIG_REPLY) | (1u << OFPT_SET_CONFIG))); } + +/* Returns true if 'rc' is currently logging information about connection + * attempts, false if logging should be suppressed because 'rc' hasn't + * successuflly connected in too long. */ +static bool +rconn_logging_connection_attempts__(const struct rconn *rc) +{ + return rc->backoff < rc->max_backoff; +}