/*
- * Copyright (c) 2008, 2009, 2010 Nicira Networks.
+ * Copyright (c) 2008, 2009, 2010, 2012 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include <config.h>
#include "reconnect.h"
-#include <assert.h>
#include <stdlib.h>
#include "poll-loop.h"
-
-#define THIS_MODULE VLM_reconnect
#include "vlog.h"
+VLOG_DEFINE_THIS_MODULE(reconnect);
+
#define STATES \
STATE(VOID, 1 << 0) \
STATE(BACKOFF, 1 << 1) \
- STATE(START_CONNECT, 1 << 2) \
- STATE(CONNECT_IN_PROGRESS, 1 << 3) \
+ STATE(CONNECTING, 1 << 3) \
STATE(ACTIVE, 1 << 4) \
STATE(IDLE, 1 << 5) \
- STATE(RECONNECT, 1 << 6)
+ STATE(RECONNECT, 1 << 6) \
+ STATE(LISTENING, 1 << 7)
enum state {
#define STATE(NAME, VALUE) S_##NAME = VALUE,
STATES
int min_backoff;
int max_backoff;
int probe_interval;
+ bool passive;
+ enum vlog_level info; /* Used for informational messages. */
/* State. */
enum state state;
long long int state_entered;
int backoff;
- long long int last_received;
+ long long int last_activity;
long long int last_connected;
+ long long int last_disconnected;
unsigned int max_tries;
/* These values are simply for statistics reporting, not otherwise used
struct reconnect *fsm = xzalloc(sizeof *fsm);
fsm->name = xstrdup("void");
- fsm->min_backoff = 1000;
- fsm->max_backoff = 8000;
- fsm->probe_interval = 5000;
+ fsm->min_backoff = RECONNECT_DEFAULT_MIN_BACKOFF;
+ fsm->max_backoff = RECONNECT_DEFAULT_MAX_BACKOFF;
+ fsm->probe_interval = RECONNECT_DEFAULT_PROBE_INTERVAL;
+ fsm->passive = false;
+ fsm->info = VLL_INFO;
fsm->state = S_VOID;
fsm->state_entered = now;
fsm->backoff = 0;
- fsm->last_received = now;
- fsm->last_connected = now;
+ fsm->last_activity = now;
+ fsm->last_connected = LLONG_MAX;
+ fsm->last_disconnected = LLONG_MAX;
fsm->max_tries = UINT_MAX;
fsm->creation_time = now;
}
}
+/* If 'quiet' is true, 'fsm' will log informational messages at level VLL_DBG,
+ * by default keeping them out of log files. This is appropriate if the
+ * connection is one that is expected to be short-lived, so that the log
+ * messages are merely distracting.
+ *
+ * If 'quiet' is false, 'fsm' logs informational messages at level VLL_INFO.
+ * This is the default.
+ *
+ * This setting has no effect on the log level of debugging, warning, or error
+ * messages. */
+void
+reconnect_set_quiet(struct reconnect *fsm, bool quiet)
+{
+ fsm->info = quiet ? VLL_DBG : VLL_INFO;
+}
+
/* Returns 'fsm''s name. */
const char *
reconnect_get_name(const struct reconnect *fsm)
}
/* Return the minimum number of milliseconds to back off between consecutive
- * connection attempts. The default is 1000 ms. */
+ * connection attempts. The default is RECONNECT_DEFAULT_MIN_BACKOFF. */
int
reconnect_get_min_backoff(const struct reconnect *fsm)
{
}
/* Return the maximum number of milliseconds to back off between consecutive
- * connection attempts. The default is 8000 ms. */
+ * connection attempts. The default is RECONNECT_DEFAULT_MAX_BACKOFF. */
int
reconnect_get_max_backoff(const struct reconnect *fsm)
{
/* Returns the "probe interval" for 'fsm' in milliseconds. If this is zero, it
* disables the connection keepalive feature. If it is nonzero, then if the
- * interval passes while 'fsm' is connected and without reconnect_received()
+ * interval passes while 'fsm' is connected and without reconnect_activity()
* being called for 'fsm', reconnect_run() returns RECONNECT_PROBE. If the
- * interval passes again without reconnect_received() being called,
+ * interval passes again without reconnect_activity() being called,
* reconnect_run() returns RECONNECT_DISCONNECT for 'fsm'. */
int
reconnect_get_probe_interval(const struct reconnect *fsm)
* attempts.
*
* 'min_backoff' must be at least 1000, and 'max_backoff' must be greater than
- * or equal to 'min_backoff'. */
+ * or equal to 'min_backoff'.
+ *
+ * Pass 0 for 'min_backoff' or 'max_backoff' or both to use the defaults. */
void
reconnect_set_backoff(struct reconnect *fsm, int min_backoff, int max_backoff)
{
fsm->min_backoff = MAX(min_backoff, 1000);
- fsm->max_backoff = max_backoff ? MAX(max_backoff, 1000) : 8000;
+ fsm->max_backoff = (max_backoff
+ ? MAX(max_backoff, 1000)
+ : RECONNECT_DEFAULT_MAX_BACKOFF);
if (fsm->min_backoff > fsm->max_backoff) {
fsm->max_backoff = fsm->min_backoff;
}
/* Sets the "probe interval" for 'fsm' to 'probe_interval', in milliseconds.
* If this is zero, it disables the connection keepalive feature. If it is
* nonzero, then if the interval passes while 'fsm' is connected and without
- * reconnect_received() being called for 'fsm', reconnect_run() returns
- * RECONNECT_PROBE. If the interval passes again without reconnect_received()
+ * reconnect_activity() being called for 'fsm', reconnect_run() returns
+ * RECONNECT_PROBE. If the interval passes again without reconnect_activity()
* being called, reconnect_run() returns RECONNECT_DISCONNECT for 'fsm'.
*
* If 'probe_interval' is nonzero, then it will be forced to a value of at
fsm->probe_interval = probe_interval ? MAX(1000, probe_interval) : 0;
}
+/* Returns true if 'fsm' is in passive mode, false if 'fsm' is in active mode
+ * (the default). */
+bool
+reconnect_is_passive(const struct reconnect *fsm)
+{
+ return fsm->passive;
+}
+
+/* Configures 'fsm' for active or passive mode. In active mode (the default),
+ * the FSM is attempting to connect to a remote host. In passive mode, the FSM
+ * is listening for connections from a remote host. */
+void
+reconnect_set_passive(struct reconnect *fsm, bool passive, long long int now)
+{
+ if (fsm->passive != passive) {
+ fsm->passive = passive;
+
+ if (passive
+ ? fsm->state & (S_CONNECTING | S_RECONNECT)
+ : fsm->state == S_LISTENING && reconnect_may_retry(fsm)) {
+ reconnect_transition__(fsm, now, S_BACKOFF);
+ fsm->backoff = 0;
+ }
+ }
+}
+
/* Returns true if 'fsm' has been enabled with reconnect_enable(). Calling
* another function that indicates a change in connection state, such as
* reconnect_disconnected() or reconnect_force_reconnect(), will also enable
void
reconnect_force_reconnect(struct reconnect *fsm, long long int now)
{
- if (fsm->state & (S_START_CONNECT | S_CONNECT_IN_PROGRESS
- | S_ACTIVE | S_IDLE)) {
+ if (fsm->state & (S_CONNECTING | S_ACTIVE | S_IDLE)) {
reconnect_transition__(fsm, now, S_RECONNECT);
}
}
VLOG_WARN("%s: connection dropped (%s)",
fsm->name, strerror(error));
} else if (error == EOF) {
- VLOG_INFO("%s: connection closed by peer", fsm->name);
+ VLOG(fsm->info, "%s: connection closed by peer", fsm->name);
} else {
- VLOG_INFO("%s: connection dropped", fsm->name);
+ VLOG(fsm->info, "%s: connection dropped", fsm->name);
}
- } else {
+ } else if (fsm->state == S_LISTENING) {
if (error > 0) {
- VLOG_WARN("%s: connection attempt failed (%s)",
+ VLOG_WARN("%s: error listening for connections (%s)",
fsm->name, strerror(error));
} else {
- VLOG_INFO("%s: connection attempt timed out", fsm->name);
+ VLOG(fsm->info, "%s: error listening for connections",
+ fsm->name);
+ }
+ } else {
+ const char *type = fsm->passive ? "listen" : "connection";
+ if (error > 0) {
+ VLOG_WARN("%s: %s attempt failed (%s)",
+ fsm->name, type, strerror(error));
+ } else {
+ VLOG(fsm->info, "%s: %s attempt timed out", fsm->name, type);
}
}
+ if (fsm->state & (S_ACTIVE | S_IDLE)) {
+ fsm->last_disconnected = now;
+ }
/* Back off. */
if (fsm->state & (S_ACTIVE | S_IDLE)
- && fsm->last_received - fsm->last_connected >= fsm->backoff) {
- fsm->backoff = fsm->min_backoff;
+ && (fsm->last_activity - fsm->last_connected >= fsm->backoff
+ || fsm->passive)) {
+ fsm->backoff = fsm->passive ? 0 : fsm->min_backoff;
} else {
if (fsm->backoff < fsm->min_backoff) {
fsm->backoff = fsm->min_backoff;
} else {
fsm->backoff *= 2;
}
- VLOG_INFO("%s: waiting %.3g seconds before reconnect\n",
- fsm->name, fsm->backoff / 1000.0);
+ if (fsm->passive) {
+ VLOG(fsm->info, "%s: waiting %.3g seconds before trying to "
+ "listen again", fsm->name, fsm->backoff / 1000.0);
+ } else {
+ VLOG(fsm->info, "%s: waiting %.3g seconds before reconnect",
+ fsm->name, fsm->backoff / 1000.0);
+ }
}
reconnect_transition__(fsm, now,
}
}
-/* Tell 'fsm' that a connection attempt is in progress.
+/* Tell 'fsm' that a connection or listening attempt is in progress.
*
- * The FSM will start a timer, after which the connection attempt will be
- * aborted (by returning RECONNECT_DISCONNECT from reconect_run()). */
+ * The FSM will start a timer, after which the connection or listening attempt
+ * will be aborted (by returning RECONNECT_DISCONNECT from
+ * reconnect_run()). */
void
reconnect_connecting(struct reconnect *fsm, long long int now)
{
- if (fsm->state != S_CONNECT_IN_PROGRESS) {
- VLOG_INFO("%s: connecting...", fsm->name);
- reconnect_transition__(fsm, now, S_CONNECT_IN_PROGRESS);
+ if (fsm->state != S_CONNECTING) {
+ if (fsm->passive) {
+ VLOG(fsm->info, "%s: listening...", fsm->name);
+ } else {
+ VLOG(fsm->info, "%s: connecting...", fsm->name);
+ }
+ reconnect_transition__(fsm, now, S_CONNECTING);
+ }
+}
+
+/* Tell 'fsm' that the client is listening for connection attempts. This state
+ * last indefinitely until the client reports some change.
+ *
+ * The natural progression from this state is for the client to report that a
+ * connection has been accepted or is in progress of being accepted, by calling
+ * reconnect_connecting() or reconnect_connected().
+ *
+ * The client may also report that listening failed (e.g. accept() returned an
+ * unexpected error such as ENOMEM) by calling reconnect_listen_error(), in
+ * which case the FSM will back off and eventually return RECONNECT_CONNECT
+ * from reconnect_run() to tell the client to try listening again. */
+void
+reconnect_listening(struct reconnect *fsm, long long int now)
+{
+ if (fsm->state != S_LISTENING) {
+ VLOG(fsm->info, "%s: listening...", fsm->name);
+ reconnect_transition__(fsm, now, S_LISTENING);
+ }
+}
+
+/* Tell 'fsm' that the client's attempt to accept a connection failed
+ * (e.g. accept() returned an unexpected error such as ENOMEM).
+ *
+ * If the FSM is currently listening (reconnect_listening() was called), it
+ * will back off and eventually return RECONNECT_CONNECT from reconnect_run()
+ * to tell the client to try listening again. If there is an active
+ * connection, this will be delayed until that connection drops. */
+void
+reconnect_listen_error(struct reconnect *fsm, long long int now, int error)
+{
+ if (fsm->state == S_LISTENING) {
+ reconnect_disconnected(fsm, now, error);
}
}
/* Tell 'fsm' that the connection was successful.
*
* The FSM will start the probe interval timer, which is reset by
- * reconnect_received(). If the timer expires, a probe will be sent (by
+ * reconnect_activity(). If the timer expires, a probe will be sent (by
* returning RECONNECT_PROBE from reconnect_run()). If the timer expires
* again without being reset, the connection will be aborted (by returning
* RECONNECT_DISCONNECT from reconnect_run()). */
if (!is_connected_state(fsm->state)) {
reconnect_connecting(fsm, now);
- VLOG_INFO("%s: connected", fsm->name);
+ VLOG(fsm->info, "%s: connected", fsm->name);
reconnect_transition__(fsm, now, S_ACTIVE);
fsm->last_connected = now;
}
reconnect_disconnected(fsm, now, error);
}
-/* Tell 'fsm' that some data was received. This resets the probe interval
- * timer, so that the connection is known not to be idle. */
+/* Tell 'fsm' that some activity has occurred on the connection. This resets
+ * the probe interval timer, so that the connection is known not to be idle. */
void
-reconnect_received(struct reconnect *fsm, long long int now)
+reconnect_activity(struct reconnect *fsm, long long int now)
{
if (fsm->state != S_ACTIVE) {
reconnect_transition__(fsm, now, S_ACTIVE);
}
- fsm->last_received = now;
+ fsm->last_activity = now;
}
static void
reconnect_transition__(struct reconnect *fsm, long long int now,
enum state state)
{
- if (fsm->state == S_CONNECT_IN_PROGRESS) {
+ if (fsm->state == S_CONNECTING) {
fsm->n_attempted_connections++;
if (state == S_ACTIVE) {
fsm->n_successful_connections++;
static long long int
reconnect_deadline__(const struct reconnect *fsm)
{
- assert(fsm->state_entered != LLONG_MIN);
+ ovs_assert(fsm->state_entered != LLONG_MIN);
switch (fsm->state) {
case S_VOID:
+ case S_LISTENING:
return LLONG_MAX;
case S_BACKOFF:
return fsm->state_entered + fsm->backoff;
- case S_START_CONNECT:
- case S_CONNECT_IN_PROGRESS:
+ case S_CONNECTING:
return fsm->state_entered + MAX(1000, fsm->backoff);
case S_ACTIVE:
if (fsm->probe_interval) {
- long long int base = MAX(fsm->last_received, fsm->state_entered);
+ long long int base = MAX(fsm->last_activity, fsm->state_entered);
return base + fsm->probe_interval;
}
return LLONG_MAX;
case S_IDLE:
- return fsm->state_entered + fsm->probe_interval;
+ if (fsm->probe_interval) {
+ return fsm->state_entered + fsm->probe_interval;
+ }
+ return LLONG_MAX;
case S_RECONNECT:
return fsm->state_entered;
*
* - 0: The client need not take any action.
*
- * - RECONNECT_CONNECT: The client should start a connection attempt and
- * indicate this by calling reconnect_connecting(). If the connection
- * attempt has definitely succeeded, it should call
+ * - Active client, RECONNECT_CONNECT: The client should start a connection
+ * attempt and indicate this by calling reconnect_connecting(). If the
+ * connection attempt has definitely succeeded, it should call
* reconnect_connected(). If the connection attempt has definitely
* failed, it should call reconnect_connect_failed().
*
* (e.g. connect()) even if the connection might soon abort due to a
* failure at a high-level (e.g. SSL negotiation failure).
*
+ * - Passive client, RECONNECT_CONNECT: The client should try to listen for
+ * a connection, if it is not already listening. It should call
+ * reconnect_listening() if successful, otherwise reconnect_connecting()
+ * or reconnected_connect_failed() if the attempt is in progress or
+ * definitely failed, respectively.
+ *
+ * A listening passive client should constantly attempt to accept a new
+ * connection and report an accepted connection with
+ * reconnect_connected().
+ *
* - RECONNECT_DISCONNECT: The client should abort the current connection
- * or connection attempt and call reconnect_disconnected() or
- * reconnect_connect_failed() to indicate it.
+ * or connection attempt or listen attempt and call
+ * reconnect_disconnected() or reconnect_connect_failed() to indicate it.
*
* - RECONNECT_PROBE: The client should send some kind of request to the
* peer that will elicit a response, to ensure that the connection is
case S_BACKOFF:
return RECONNECT_CONNECT;
- case S_START_CONNECT:
- case S_CONNECT_IN_PROGRESS:
+ case S_CONNECTING:
return RECONNECT_DISCONNECT;
case S_ACTIVE:
VLOG_DBG("%s: idle %lld ms, sending inactivity probe", fsm->name,
- now - MAX(fsm->last_received, fsm->state_entered));
+ now - MAX(fsm->last_activity, fsm->state_entered));
reconnect_transition__(fsm, now, S_IDLE);
return RECONNECT_PROBE;
case S_RECONNECT:
return RECONNECT_DISCONNECT;
+
+ case S_LISTENING:
+ return 0;
}
NOT_REACHED();
} else {
- return fsm->state == S_START_CONNECT ? RECONNECT_CONNECT : 0;
+ return 0;
}
}
return is_connected_state(fsm->state);
}
-/* Returns the number of milliseconds for which 'fsm' has been continuously
- * connected to its peer. (If 'fsm' is not currently connected, this is 0.) */
+/* Returns the number of milliseconds since 'fsm' last successfully connected
+ * to its peer (even if it has since disconnected). Returns UINT_MAX if never
+ * connected. */
+unsigned int
+reconnect_get_last_connect_elapsed(const struct reconnect *fsm,
+ long long int now)
+{
+ return fsm->last_connected == LLONG_MAX ? UINT_MAX
+ : now - fsm->last_connected;
+}
+
+/* Returns the number of milliseconds since 'fsm' last disconnected
+ * from its peer (even if it has since reconnected). Returns UINT_MAX if never
+ * disconnected. */
unsigned int
-reconnect_get_connection_duration(const struct reconnect *fsm,
- long long int now)
+reconnect_get_last_disconnect_elapsed(const struct reconnect *fsm,
+ long long int now)
{
- return reconnect_is_connected(fsm) ? now - fsm->last_connected : 0;
+ return fsm->last_disconnected == LLONG_MAX ? UINT_MAX
+ : now - fsm->last_disconnected;
}
/* Copies various statistics for 'fsm' into '*stats'. */
struct reconnect_stats *stats)
{
stats->creation_time = fsm->creation_time;
- stats->last_received = fsm->last_received;
+ stats->last_activity = fsm->last_activity;
stats->last_connected = fsm->last_connected;
+ stats->last_disconnected = fsm->last_disconnected;
stats->backoff = fsm->backoff;
stats->seqno = fsm->seqno;
stats->is_connected = reconnect_is_connected(fsm);
- stats->current_connection_duration
- = reconnect_get_connection_duration(fsm, now);
- stats->total_connected_duration = (stats->current_connection_duration
- + fsm->total_connected_duration);
+ stats->msec_since_connect
+ = reconnect_get_last_connect_elapsed(fsm, now);
+ stats->msec_since_disconnect
+ = reconnect_get_last_disconnect_elapsed(fsm, now);
+ stats->total_connected_duration = fsm->total_connected_duration
+ + (is_connected_state(fsm->state)
+ ? reconnect_get_last_connect_elapsed(fsm, now) : 0);
stats->n_attempted_connections = fsm->n_attempted_connections;
stats->n_successful_connections = fsm->n_successful_connections;
stats->state = reconnect_state_name__(fsm->state);