X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Freconnect.c;h=5296c5cbe460e0170823d011ebce1307b6360260;hb=28c5588e8e1a8d091c5d2275232c35f2968a97fa;hp=fadeeb89b20a834522093786698cc45ef5d6a65c;hpb=3ed497fc10033c9857140270d60ef6aa2d7c0c08;p=sliver-openvswitch.git diff --git a/lib/reconnect.c b/lib/reconnect.c index fadeeb89b..5296c5cbe 100644 --- a/lib/reconnect.c +++ b/lib/reconnect.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009 Nicira Networks. + * Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,21 +17,21 @@ #include #include "reconnect.h" -#include #include #include "poll-loop.h" - -#define THIS_MODULE VLM_reconnect #include "vlog.h" +VLOG_DEFINE_THIS_MODULE(reconnect); + #define STATES \ STATE(VOID, 1 << 0) \ STATE(BACKOFF, 1 << 1) \ - STATE(CONNECTING, 1 << 2) \ - STATE(ACTIVE, 1 << 3) \ - STATE(IDLE, 1 << 4) \ - STATE(RECONNECT, 1 << 5) + STATE(CONNECTING, 1 << 3) \ + STATE(ACTIVE, 1 << 4) \ + STATE(IDLE, 1 << 5) \ + STATE(RECONNECT, 1 << 6) \ + STATE(LISTENING, 1 << 7) enum state { #define STATE(NAME, VALUE) S_##NAME = VALUE, STATES @@ -50,13 +50,17 @@ struct reconnect { int min_backoff; int max_backoff; int probe_interval; + bool passive; + enum vlog_level info; /* Used for informational messages. */ /* State. */ enum state state; long long int state_entered; int backoff; - long long int last_received; + long long int last_activity; long long int last_connected; + long long int last_disconnected; + unsigned int max_tries; /* These values are simply for statistics reporting, not otherwise used * directly by anything internal. */ @@ -69,6 +73,7 @@ struct reconnect { static void reconnect_transition__(struct reconnect *, long long int now, enum state state); static long long int reconnect_deadline__(const struct reconnect *); +static bool reconnect_may_retry(struct reconnect *); static const char * reconnect_state_name__(enum state state) @@ -90,15 +95,19 @@ reconnect_create(long long int now) struct reconnect *fsm = xzalloc(sizeof *fsm); fsm->name = xstrdup("void"); - fsm->min_backoff = 1000; - fsm->max_backoff = 8000; - fsm->probe_interval = 5000; + fsm->min_backoff = RECONNECT_DEFAULT_MIN_BACKOFF; + fsm->max_backoff = RECONNECT_DEFAULT_MAX_BACKOFF; + fsm->probe_interval = RECONNECT_DEFAULT_PROBE_INTERVAL; + fsm->passive = false; + fsm->info = VLL_INFO; fsm->state = S_VOID; fsm->state_entered = now; fsm->backoff = 0; - fsm->last_received = now; - fsm->last_connected = now; + fsm->last_activity = now; + fsm->last_connected = LLONG_MAX; + fsm->last_disconnected = LLONG_MAX; + fsm->max_tries = UINT_MAX; fsm->creation_time = now; return fsm; @@ -114,6 +123,22 @@ reconnect_destroy(struct reconnect *fsm) } } +/* If 'quiet' is true, 'fsm' will log informational messages at level VLL_DBG, + * by default keeping them out of log files. This is appropriate if the + * connection is one that is expected to be short-lived, so that the log + * messages are merely distracting. + * + * If 'quiet' is false, 'fsm' logs informational messages at level VLL_INFO. + * This is the default. + * + * This setting has no effect on the log level of debugging, warning, or error + * messages. */ +void +reconnect_set_quiet(struct reconnect *fsm, bool quiet) +{ + fsm->info = quiet ? VLL_DBG : VLL_INFO; +} + /* Returns 'fsm''s name. */ const char * reconnect_get_name(const struct reconnect *fsm) @@ -133,7 +158,7 @@ reconnect_set_name(struct reconnect *fsm, const char *name) } /* Return the minimum number of milliseconds to back off between consecutive - * connection attempts. The default is 1000 ms. */ + * connection attempts. The default is RECONNECT_DEFAULT_MIN_BACKOFF. */ int reconnect_get_min_backoff(const struct reconnect *fsm) { @@ -141,7 +166,7 @@ reconnect_get_min_backoff(const struct reconnect *fsm) } /* Return the maximum number of milliseconds to back off between consecutive - * connection attempts. The default is 8000 ms. */ + * connection attempts. The default is RECONNECT_DEFAULT_MAX_BACKOFF. */ int reconnect_get_max_backoff(const struct reconnect *fsm) { @@ -150,9 +175,9 @@ reconnect_get_max_backoff(const struct reconnect *fsm) /* Returns the "probe interval" for 'fsm' in milliseconds. If this is zero, it * disables the connection keepalive feature. If it is nonzero, then if the - * interval passes while 'fsm' is connected and without reconnect_received() + * interval passes while 'fsm' is connected and without reconnect_activity() * being called for 'fsm', reconnect_run() returns RECONNECT_PROBE. If the - * interval passes again without reconnect_received() being called, + * interval passes again without reconnect_activity() being called, * reconnect_run() returns RECONNECT_DISCONNECT for 'fsm'. */ int reconnect_get_probe_interval(const struct reconnect *fsm) @@ -160,17 +185,42 @@ reconnect_get_probe_interval(const struct reconnect *fsm) return fsm->probe_interval; } +/* Limits the maximum number of times that 'fsm' will ask the client to try to + * reconnect to 'max_tries'. UINT_MAX (the default) means an unlimited number + * of tries. + * + * After the number of tries has expired, the 'fsm' will disable itself + * instead of backing off and retrying. */ +void +reconnect_set_max_tries(struct reconnect *fsm, unsigned int max_tries) +{ + fsm->max_tries = max_tries; +} + +/* Returns the current remaining number of connection attempts, UINT_MAX if + * the number is unlimited. */ +unsigned int +reconnect_get_max_tries(struct reconnect *fsm) +{ + return fsm->max_tries; +} + /* Configures the backoff parameters for 'fsm'. 'min_backoff' is the minimum * number of milliseconds, and 'max_backoff' is the maximum, between connection - * attempts. + * attempts. The current backoff is also the duration that 'fsm' is willing to + * wait for a given connection to succeed or fail. * * 'min_backoff' must be at least 1000, and 'max_backoff' must be greater than - * or equal to 'min_backoff'. */ + * or equal to 'min_backoff'. + * + * Pass 0 for 'min_backoff' or 'max_backoff' or both to use the defaults. */ void reconnect_set_backoff(struct reconnect *fsm, int min_backoff, int max_backoff) { fsm->min_backoff = MAX(min_backoff, 1000); - fsm->max_backoff = max_backoff ? MAX(max_backoff, 1000) : 8000; + fsm->max_backoff = (max_backoff + ? MAX(max_backoff, 1000) + : RECONNECT_DEFAULT_MAX_BACKOFF); if (fsm->min_backoff > fsm->max_backoff) { fsm->max_backoff = fsm->min_backoff; } @@ -183,8 +233,8 @@ reconnect_set_backoff(struct reconnect *fsm, int min_backoff, int max_backoff) /* Sets the "probe interval" for 'fsm' to 'probe_interval', in milliseconds. * If this is zero, it disables the connection keepalive feature. If it is * nonzero, then if the interval passes while 'fsm' is connected and without - * reconnect_received() being called for 'fsm', reconnect_run() returns - * RECONNECT_PROBE. If the interval passes again without reconnect_received() + * reconnect_activity() being called for 'fsm', reconnect_run() returns + * RECONNECT_PROBE. If the interval passes again without reconnect_activity() * being called, reconnect_run() returns RECONNECT_DISCONNECT for 'fsm'. * * If 'probe_interval' is nonzero, then it will be forced to a value of at @@ -195,6 +245,32 @@ reconnect_set_probe_interval(struct reconnect *fsm, int probe_interval) fsm->probe_interval = probe_interval ? MAX(1000, probe_interval) : 0; } +/* Returns true if 'fsm' is in passive mode, false if 'fsm' is in active mode + * (the default). */ +bool +reconnect_is_passive(const struct reconnect *fsm) +{ + return fsm->passive; +} + +/* Configures 'fsm' for active or passive mode. In active mode (the default), + * the FSM is attempting to connect to a remote host. In passive mode, the FSM + * is listening for connections from a remote host. */ +void +reconnect_set_passive(struct reconnect *fsm, bool passive, long long int now) +{ + if (fsm->passive != passive) { + fsm->passive = passive; + + if (passive + ? fsm->state & (S_CONNECTING | S_RECONNECT) + : fsm->state == S_LISTENING && reconnect_may_retry(fsm)) { + reconnect_transition__(fsm, now, S_BACKOFF); + fsm->backoff = 0; + } + } +} + /* Returns true if 'fsm' has been enabled with reconnect_enable(). Calling * another function that indicates a change in connection state, such as * reconnect_disconnected() or reconnect_force_reconnect(), will also enable @@ -213,7 +289,7 @@ reconnect_is_enabled(const struct reconnect *fsm) void reconnect_enable(struct reconnect *fsm, long long int now) { - if (fsm->state == S_VOID) { + if (fsm->state == S_VOID && reconnect_may_retry(fsm)) { reconnect_transition__(fsm, now, S_BACKOFF); fsm->backoff = 0; } @@ -250,30 +326,43 @@ reconnect_force_reconnect(struct reconnect *fsm, long long int now) void reconnect_disconnected(struct reconnect *fsm, long long int now, int error) { - if (fsm->state != S_BACKOFF) { + if (!(fsm->state & (S_BACKOFF | S_VOID))) { /* Report what happened. */ if (fsm->state & (S_ACTIVE | S_IDLE)) { if (error > 0) { VLOG_WARN("%s: connection dropped (%s)", - fsm->name, strerror(error)); + fsm->name, ovs_strerror(error)); } else if (error == EOF) { - VLOG_INFO("%s: connection closed by peer", fsm->name); + VLOG(fsm->info, "%s: connection closed by peer", fsm->name); + } else { + VLOG(fsm->info, "%s: connection dropped", fsm->name); + } + } else if (fsm->state == S_LISTENING) { + if (error > 0) { + VLOG_WARN("%s: error listening for connections (%s)", + fsm->name, ovs_strerror(error)); } else { - VLOG_INFO("%s: connection dropped", fsm->name); + VLOG(fsm->info, "%s: error listening for connections", + fsm->name); } } else { + const char *type = fsm->passive ? "listen" : "connection"; if (error > 0) { - VLOG_WARN("%s: connection attempt failed (%s)", - fsm->name, strerror(error)); + VLOG_INFO("%s: %s attempt failed (%s)", + fsm->name, type, ovs_strerror(error)); } else { - VLOG_INFO("%s: connection attempt timed out", fsm->name); + VLOG(fsm->info, "%s: %s attempt timed out", fsm->name, type); } } + if (fsm->state & (S_ACTIVE | S_IDLE)) { + fsm->last_disconnected = now; + } /* Back off. */ if (fsm->state & (S_ACTIVE | S_IDLE) - && fsm->last_received - fsm->last_connected >= fsm->backoff) { - fsm->backoff = fsm->min_backoff; + && (fsm->last_activity - fsm->last_connected >= fsm->backoff + || fsm->passive)) { + fsm->backoff = fsm->passive ? 0 : fsm->min_backoff; } else { if (fsm->backoff < fsm->min_backoff) { fsm->backoff = fsm->min_backoff; @@ -282,30 +371,77 @@ reconnect_disconnected(struct reconnect *fsm, long long int now, int error) } else { fsm->backoff *= 2; } - VLOG_INFO("%s: waiting %.3g seconds before reconnect\n", - fsm->name, fsm->backoff / 1000.0); + if (fsm->passive) { + VLOG(fsm->info, "%s: waiting %.3g seconds before trying to " + "listen again", fsm->name, fsm->backoff / 1000.0); + } else { + VLOG(fsm->info, "%s: waiting %.3g seconds before reconnect", + fsm->name, fsm->backoff / 1000.0); + } } - reconnect_transition__(fsm, now, S_BACKOFF); + + reconnect_transition__(fsm, now, + reconnect_may_retry(fsm) ? S_BACKOFF : S_VOID); } } -/* Tell 'fsm' that a connection attempt is in progress. +/* Tell 'fsm' that a connection or listening attempt is in progress. * - * The FSM will start a timer, after which the connection attempt will be - * aborted (by returning RECONNECT_DISCONNECT from reconect_run()). */ + * The FSM will start a timer, after which the connection or listening attempt + * will be aborted (by returning RECONNECT_DISCONNECT from + * reconnect_run()). */ void reconnect_connecting(struct reconnect *fsm, long long int now) { if (fsm->state != S_CONNECTING) { - VLOG_INFO("%s: connecting...", fsm->name); + if (fsm->passive) { + VLOG(fsm->info, "%s: listening...", fsm->name); + } else { + VLOG(fsm->info, "%s: connecting...", fsm->name); + } reconnect_transition__(fsm, now, S_CONNECTING); } } +/* Tell 'fsm' that the client is listening for connection attempts. This state + * last indefinitely until the client reports some change. + * + * The natural progression from this state is for the client to report that a + * connection has been accepted or is in progress of being accepted, by calling + * reconnect_connecting() or reconnect_connected(). + * + * The client may also report that listening failed (e.g. accept() returned an + * unexpected error such as ENOMEM) by calling reconnect_listen_error(), in + * which case the FSM will back off and eventually return RECONNECT_CONNECT + * from reconnect_run() to tell the client to try listening again. */ +void +reconnect_listening(struct reconnect *fsm, long long int now) +{ + if (fsm->state != S_LISTENING) { + VLOG(fsm->info, "%s: listening...", fsm->name); + reconnect_transition__(fsm, now, S_LISTENING); + } +} + +/* Tell 'fsm' that the client's attempt to accept a connection failed + * (e.g. accept() returned an unexpected error such as ENOMEM). + * + * If the FSM is currently listening (reconnect_listening() was called), it + * will back off and eventually return RECONNECT_CONNECT from reconnect_run() + * to tell the client to try listening again. If there is an active + * connection, this will be delayed until that connection drops. */ +void +reconnect_listen_error(struct reconnect *fsm, long long int now, int error) +{ + if (fsm->state == S_LISTENING) { + reconnect_disconnected(fsm, now, error); + } +} + /* Tell 'fsm' that the connection was successful. * * The FSM will start the probe interval timer, which is reset by - * reconnect_received(). If the timer expires, a probe will be sent (by + * reconnect_activity(). If the timer expires, a probe will be sent (by * returning RECONNECT_PROBE from reconnect_run()). If the timer expires * again without being reset, the connection will be aborted (by returning * RECONNECT_DISCONNECT from reconnect_run()). */ @@ -315,7 +451,7 @@ reconnect_connected(struct reconnect *fsm, long long int now) if (!is_connected_state(fsm->state)) { reconnect_connecting(fsm, now); - VLOG_INFO("%s: connected", fsm->name); + VLOG(fsm->info, "%s: connected", fsm->name); reconnect_transition__(fsm, now, S_ACTIVE); fsm->last_connected = now; } @@ -331,15 +467,15 @@ reconnect_connect_failed(struct reconnect *fsm, long long int now, int error) reconnect_disconnected(fsm, now, error); } -/* Tell 'fsm' that some data was received. This resets the probe interval - * timer, so that the connection is known not to be idle. */ +/* Tell 'fsm' that some activity has occurred on the connection. This resets + * the probe interval timer, so that the connection is known not to be idle. */ void -reconnect_received(struct reconnect *fsm, long long int now) +reconnect_activity(struct reconnect *fsm, long long int now) { if (fsm->state != S_ACTIVE) { reconnect_transition__(fsm, now, S_ACTIVE); } - fsm->last_received = now; + fsm->last_activity = now; } static void @@ -367,9 +503,10 @@ reconnect_transition__(struct reconnect *fsm, long long int now, static long long int reconnect_deadline__(const struct reconnect *fsm) { - assert(fsm->state_entered != LLONG_MIN); + ovs_assert(fsm->state_entered != LLONG_MIN); switch (fsm->state) { case S_VOID: + case S_LISTENING: return LLONG_MAX; case S_BACKOFF: @@ -380,19 +517,22 @@ reconnect_deadline__(const struct reconnect *fsm) case S_ACTIVE: if (fsm->probe_interval) { - long long int base = MAX(fsm->last_received, fsm->state_entered); + long long int base = MAX(fsm->last_activity, fsm->state_entered); return base + fsm->probe_interval; } return LLONG_MAX; case S_IDLE: - return fsm->state_entered + fsm->probe_interval; + if (fsm->probe_interval) { + return fsm->state_entered + fsm->probe_interval; + } + return LLONG_MAX; case S_RECONNECT: return fsm->state_entered; } - NOT_REACHED(); + OVS_NOT_REACHED(); } /* Assesses whether any action should be taken on 'fsm'. The return value is @@ -400,9 +540,9 @@ reconnect_deadline__(const struct reconnect *fsm) * * - 0: The client need not take any action. * - * - RECONNECT_CONNECT: The client should start a connection attempt and - * indicate this by calling reconnect_connecting(). If the connection - * attempt has definitely succeeded, it should call + * - Active client, RECONNECT_CONNECT: The client should start a connection + * attempt and indicate this by calling reconnect_connecting(). If the + * connection attempt has definitely succeeded, it should call * reconnect_connected(). If the connection attempt has definitely * failed, it should call reconnect_connect_failed(). * @@ -412,9 +552,19 @@ reconnect_deadline__(const struct reconnect *fsm) * (e.g. connect()) even if the connection might soon abort due to a * failure at a high-level (e.g. SSL negotiation failure). * + * - Passive client, RECONNECT_CONNECT: The client should try to listen for + * a connection, if it is not already listening. It should call + * reconnect_listening() if successful, otherwise reconnect_connecting() + * or reconnected_connect_failed() if the attempt is in progress or + * definitely failed, respectively. + * + * A listening passive client should constantly attempt to accept a new + * connection and report an accepted connection with + * reconnect_connected(). + * * - RECONNECT_DISCONNECT: The client should abort the current connection - * or connection attempt and call reconnect_disconnected() or - * reconnect_connect_failed() to indicate it. + * or connection attempt or listen attempt and call + * reconnect_disconnected() or reconnect_connect_failed() to indicate it. * * - RECONNECT_PROBE: The client should send some kind of request to the * peer that will elicit a response, to ensure that the connection is @@ -437,7 +587,7 @@ reconnect_run(struct reconnect *fsm, long long int now) case S_ACTIVE: VLOG_DBG("%s: idle %lld ms, sending inactivity probe", fsm->name, - now - MAX(fsm->last_received, fsm->state_entered)); + now - MAX(fsm->last_activity, fsm->state_entered)); reconnect_transition__(fsm, now, S_IDLE); return RECONNECT_PROBE; @@ -449,11 +599,14 @@ reconnect_run(struct reconnect *fsm, long long int now) case S_RECONNECT: return RECONNECT_DISCONNECT; + + case S_LISTENING: + return 0; } - NOT_REACHED(); + OVS_NOT_REACHED(); } else { - return fsm->state == S_CONNECTING ? RECONNECT_CONNECT : 0; + return 0; } } @@ -492,13 +645,26 @@ reconnect_is_connected(const struct reconnect *fsm) return is_connected_state(fsm->state); } -/* Returns the number of milliseconds for which 'fsm' has been continuously - * connected to its peer. (If 'fsm' is not currently connected, this is 0.) */ +/* Returns the number of milliseconds since 'fsm' last successfully connected + * to its peer (even if it has since disconnected). Returns UINT_MAX if never + * connected. */ unsigned int -reconnect_get_connection_duration(const struct reconnect *fsm, - long long int now) +reconnect_get_last_connect_elapsed(const struct reconnect *fsm, + long long int now) { - return reconnect_is_connected(fsm) ? now - fsm->last_connected : 0; + return fsm->last_connected == LLONG_MAX ? UINT_MAX + : now - fsm->last_connected; +} + +/* Returns the number of milliseconds since 'fsm' last disconnected + * from its peer (even if it has since reconnected). Returns UINT_MAX if never + * disconnected. */ +unsigned int +reconnect_get_last_disconnect_elapsed(const struct reconnect *fsm, + long long int now) +{ + return fsm->last_disconnected == LLONG_MAX ? UINT_MAX + : now - fsm->last_disconnected; } /* Copies various statistics for 'fsm' into '*stats'. */ @@ -507,17 +673,31 @@ reconnect_get_stats(const struct reconnect *fsm, long long int now, struct reconnect_stats *stats) { stats->creation_time = fsm->creation_time; - stats->last_received = fsm->last_received; + stats->last_activity = fsm->last_activity; stats->last_connected = fsm->last_connected; + stats->last_disconnected = fsm->last_disconnected; stats->backoff = fsm->backoff; stats->seqno = fsm->seqno; stats->is_connected = reconnect_is_connected(fsm); - stats->current_connection_duration - = reconnect_get_connection_duration(fsm, now); - stats->total_connected_duration = (stats->current_connection_duration - + fsm->total_connected_duration); + stats->msec_since_connect + = reconnect_get_last_connect_elapsed(fsm, now); + stats->msec_since_disconnect + = reconnect_get_last_disconnect_elapsed(fsm, now); + stats->total_connected_duration = fsm->total_connected_duration + + (is_connected_state(fsm->state) + ? reconnect_get_last_connect_elapsed(fsm, now) : 0); stats->n_attempted_connections = fsm->n_attempted_connections; stats->n_successful_connections = fsm->n_successful_connections; stats->state = reconnect_state_name__(fsm->state); stats->state_elapsed = now - fsm->state_entered; } + +static bool +reconnect_may_retry(struct reconnect *fsm) +{ + bool may_retry = fsm->max_tries > 0; + if (may_retry && fsm->max_tries != UINT_MAX) { + fsm->max_tries--; + } + return may_retry; +}