X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Frconn.c;h=1b69b8f6033e30f4b17575099ad9ca32a46fa767;hb=53319f95b9d3d025917110e2688a2cf0616f7071;hp=e7685bb61426d864cf17e77b60e3dcdd15eac047;hpb=fa37b408eac875cbc0d7adbbb3f7a004371172da;p=sliver-openvswitch.git diff --git a/lib/rconn.c b/lib/rconn.c index e7685bb61..1b69b8f60 100644 --- a/lib/rconn.c +++ b/lib/rconn.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010 Nicira Networks. + * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,10 +30,15 @@ #include "timeval.h" #include "util.h" #include "vconn.h" - -#define THIS_MODULE VLM_rconn #include "vlog.h" +VLOG_DEFINE_THIS_MODULE(rconn); + +COVERAGE_DEFINE(rconn_discarded); +COVERAGE_DEFINE(rconn_overflow); +COVERAGE_DEFINE(rconn_queued); +COVERAGE_DEFINE(rconn_sent); + #define STATES \ STATE(VOID, 1 << 0) \ STATE(BACKOFF, 1 << 1) \ @@ -69,13 +74,14 @@ struct rconn { char *target; /* vconn name, passed to vconn_open(). */ bool reliable; - struct ovs_queue txq; + struct list txq; /* Contains "struct ofpbuf"s. */ int backoff; int max_backoff; time_t backoff_deadline; time_t last_received; time_t last_connected; + time_t last_disconnected; unsigned int packets_sent; unsigned int seqno; int last_error; @@ -98,16 +104,6 @@ struct rconn { time_t creation_time; unsigned long int total_time_connected; - /* If we can't connect to the peer, it could be for any number of reasons. - * Usually, one would assume it is because the peer is not running or - * because the network is partitioned. But it could also be because the - * network topology has changed, in which case the upper layer will need to - * reassess it (in particular, obtain a new IP address via DHCP and find - * the new location of the controller). We set this flag when we suspect - * that this could be the case. */ - bool questionable_connectivity; - time_t last_questioned; - /* Throughout this file, "probe" is shorthand for "inactivity probe". * When nothing has been received from the peer for a while, we send out * an echo request as an inactivity probe packet. We should receive back @@ -123,8 +119,8 @@ struct rconn { * * We don't cache the local port, because that changes from one connection * attempt to the next. */ - uint32_t local_ip, remote_ip; - uint16_t remote_port; + ovs_be32 local_ip, remote_ip; + ovs_be16 remote_port; /* Messages sent or received are copied to the monitor connections. */ #define MAX_MONITORS 8 @@ -143,10 +139,10 @@ static void reconnect(struct rconn *); static void report_error(struct rconn *, int error); static void disconnect(struct rconn *, int error); static void flush_queue(struct rconn *); -static void question_connectivity(struct rconn *); static void copy_to_monitor(struct rconn *, const struct ofpbuf *); static bool is_connected_state(enum state); static bool is_admitted_msg(const struct ofpbuf *); +static bool rconn_logging_connection_attempts__(const struct rconn *); /* Creates and returns a new rconn. * @@ -176,13 +172,14 @@ rconn_create(int probe_interval, int max_backoff) rc->target = xstrdup("void"); rc->reliable = false; - queue_init(&rc->txq); + list_init(&rc->txq); rc->backoff = 0; rc->max_backoff = max_backoff ? max_backoff : 8; rc->backoff_deadline = TIME_MIN; rc->last_received = time_now(); - rc->last_connected = time_now(); + rc->last_connected = TIME_MIN; + rc->last_disconnected = TIME_MIN; rc->seqno = 0; rc->packets_sent = 0; @@ -196,9 +193,6 @@ rconn_create(int probe_interval, int max_backoff) rc->creation_time = time_now(); rc->total_time_connected = 0; - rc->questionable_connectivity = false; - rc->last_questioned = time_now(); - rconn_set_probe_interval(rc, probe_interval); rc->n_monitors = 0; @@ -312,7 +306,7 @@ rconn_destroy(struct rconn *rc) free(rc->target); vconn_close(rc->vconn); flush_queue(rc); - queue_destroy(&rc->txq); + ofpbuf_list_delete(&rc->txq); for (i = 0; i < rc->n_monitors; i++) { vconn_close(rc->monitors[i]); } @@ -337,7 +331,9 @@ reconnect(struct rconn *rc) { int retval; - VLOG_INFO("%s: connecting...", rc->name); + if (rconn_logging_connection_attempts__(rc)) { + VLOG_INFO("%s: connecting...", rc->name); + } rc->n_attempted_connections++; retval = vconn_open(rc->target, OFP_VERSION, &rc->vconn); if (!retval) { @@ -383,10 +379,15 @@ run_CONNECTING(struct rconn *rc) state_transition(rc, S_ACTIVE); rc->last_connected = rc->state_entered; } else if (retval != EAGAIN) { - VLOG_INFO("%s: connection failed (%s)", rc->name, strerror(retval)); + if (rconn_logging_connection_attempts__(rc)) { + VLOG_INFO("%s: connection failed (%s)", + rc->name, strerror(retval)); + } disconnect(rc, retval); } else if (timed_out(rc)) { - VLOG_INFO("%s: connection timed out", rc->name); + if (rconn_logging_connection_attempts__(rc)) { + VLOG_INFO("%s: connection timed out", rc->name); + } rc->backoff_deadline = TIME_MAX; /* Prevent resetting backoff. */ disconnect(rc, ETIMEDOUT); } @@ -395,16 +396,16 @@ run_CONNECTING(struct rconn *rc) static void do_tx_work(struct rconn *rc) { - if (!rc->txq.n) { + if (list_is_empty(&rc->txq)) { return; } - while (rc->txq.n > 0) { + while (!list_is_empty(&rc->txq)) { int error = try_send(rc); if (error) { break; } } - if (!rc->txq.n) { + if (list_is_empty(&rc->txq)) { poll_immediate_wake(); } } @@ -449,7 +450,6 @@ static void run_IDLE(struct rconn *rc) { if (timed_out(rc)) { - question_connectivity(rc); VLOG_ERR("%s: no response to inactivity probe after %u " "seconds, disconnecting", rc->name, elapsed_in_this_state(rc)); @@ -497,6 +497,9 @@ rconn_run_wait(struct rconn *rc) if (rc->vconn) { vconn_run_wait(rc->vconn); + if ((rc->state & (S_ACTIVE | S_IDLE)) && !list_is_empty(&rc->txq)) { + vconn_wait(rc->vconn, WAIT_SEND); + } } for (i = 0; i < rc->n_monitors; i++) { vconn_run_wait(rc->monitors[i]); @@ -507,10 +510,6 @@ rconn_run_wait(struct rconn *rc) long long int expires = sat_add(rc->state_entered, timeo); poll_timer_wait_until(expires * 1000); } - - if ((rc->state & (S_ACTIVE | S_IDLE)) && rc->txq.n) { - vconn_wait(rc->vconn, WAIT_SEND); - } } /* Attempts to receive a packet from 'rc'. If successful, returns the packet; @@ -577,13 +576,13 @@ rconn_send(struct rconn *rc, struct ofpbuf *b, if (counter) { rconn_packet_counter_inc(counter); } - queue_push_tail(&rc->txq, b); + list_push_back(&rc->txq, &b->list_node); /* If the queue was empty before we added 'b', try to send some * packets. (But if the queue had packets in it, it's because the * vconn is backlogged and there's no point in stuffing more into it * now. We'll get back to that in rconn_run().) */ - if (rc->txq.n == 1) { + if (rc->txq.next == &b->list_node) { try_send(rc); } return 0; @@ -701,53 +700,37 @@ rconn_failure_duration(const struct rconn *rconn) /* Returns the IP address of the peer, or 0 if the peer's IP address is not * known. */ -uint32_t -rconn_get_remote_ip(const struct rconn *rconn) +ovs_be32 +rconn_get_remote_ip(const struct rconn *rconn) { return rconn->remote_ip; } /* Returns the transport port of the peer, or 0 if the peer's port is not * known. */ -uint16_t -rconn_get_remote_port(const struct rconn *rconn) +ovs_be16 +rconn_get_remote_port(const struct rconn *rconn) { return rconn->remote_port; } /* Returns the IP address used to connect to the peer, or 0 if the - * connection is not an IP-based protocol or if its IP address is not + * connection is not an IP-based protocol or if its IP address is not * known. */ -uint32_t -rconn_get_local_ip(const struct rconn *rconn) +ovs_be32 +rconn_get_local_ip(const struct rconn *rconn) { return rconn->local_ip; } /* Returns the transport port used to connect to the peer, or 0 if the * connection does not contain a port or if the port is not known. */ -uint16_t -rconn_get_local_port(const struct rconn *rconn) +ovs_be16 +rconn_get_local_port(const struct rconn *rconn) { return rconn->vconn ? vconn_get_local_port(rconn->vconn) : 0; } -/* If 'rconn' can't connect to the peer, it could be for any number of reasons. - * Usually, one would assume it is because the peer is not running or because - * the network is partitioned. But it could also be because the network - * topology has changed, in which case the upper layer will need to reassess it - * (in particular, obtain a new IP address via DHCP and find the new location - * of the controller). When this appears that this might be the case, this - * function returns true. It also clears the questionability flag and prevents - * it from being set again for some time. */ -bool -rconn_is_connectivity_questionable(struct rconn *rconn) -{ - bool questionable = rconn->questionable_connectivity; - rconn->questionable_connectivity = false; - return questionable; -} - /* Returns the total number of packets successfully received by the underlying * vconn. */ unsigned int @@ -780,13 +763,21 @@ rconn_get_successful_connections(const struct rconn *rc) } /* Returns the time at which the last successful connection was made by - * 'rc'. */ + * 'rc'. Returns TIME_MIN if never connected. */ time_t rconn_get_last_connection(const struct rconn *rc) { return rc->last_connected; } +/* Returns the time at which 'rc' was last disconnected. Returns TIME_MIN + * if never disconnected. */ +time_t +rconn_get_last_disconnect(const struct rconn *rc) +{ + return rc->last_disconnected; +} + /* Returns the time at which the last OpenFlow message was received by 'rc'. * If no packets have been received on 'rc', returns the time at which 'rc' * was created. */ @@ -907,11 +898,18 @@ rconn_set_target__(struct rconn *rc, const char *target, const char *name) static int try_send(struct rconn *rc) { - int retval = 0; - struct ofpbuf *next = rc->txq.head->next; - struct rconn_packet_counter *counter = rc->txq.head->private_p; - retval = vconn_send(rc->vconn, rc->txq.head); + struct ofpbuf *msg = ofpbuf_from_list(rc->txq.next); + struct rconn_packet_counter *counter = msg->private_p; + int retval; + + /* Eagerly remove 'msg' from the txq. We can't remove it from the list + * after sending, if sending is successful, because it is then owned by the + * vconn, which might have freed it already. */ + list_remove(&msg->list_node); + + retval = vconn_send(rc->vconn, msg); if (retval) { + list_push_front(&rc->txq, &msg->list_node); if (retval != EAGAIN) { report_error(rc, retval); disconnect(rc, retval); @@ -923,7 +921,6 @@ try_send(struct rconn *rc) if (counter) { rconn_packet_counter_dec(counter); } - queue_advance_head(&rc->txq, next); return 0; } @@ -962,6 +959,7 @@ disconnect(struct rconn *rc, int error) time_t now = time_now(); if (rc->state & (S_CONNECTING | S_ACTIVE | S_IDLE)) { + rc->last_disconnected = now; vconn_close(rc->vconn); rc->vconn = NULL; flush_queue(rc); @@ -969,17 +967,22 @@ disconnect(struct rconn *rc, int error) if (now >= rc->backoff_deadline) { rc->backoff = 1; - } else { - rc->backoff = MIN(rc->max_backoff, MAX(1, 2 * rc->backoff)); - VLOG_INFO("%s: waiting %d seconds before reconnect\n", + } else if (rc->backoff < rc->max_backoff / 2) { + rc->backoff = MAX(1, 2 * rc->backoff); + VLOG_INFO("%s: waiting %d seconds before reconnect", rc->name, rc->backoff); + } else { + if (rconn_logging_connection_attempts__(rc)) { + VLOG_INFO("%s: continuing to retry connections in the " + "background but suppressing further logging", + rc->name); + } + rc->backoff = rc->max_backoff; } rc->backoff_deadline = now + rc->backoff; state_transition(rc, S_BACKOFF); - if (now - rc->last_connected > 60) { - question_connectivity(rc); - } } else { + rc->last_disconnected = time_now(); rconn_disconnect(rc); } } @@ -989,11 +992,11 @@ disconnect(struct rconn *rc, int error) static void flush_queue(struct rconn *rc) { - if (!rc->txq.n) { + if (list_is_empty(&rc->txq)) { return; } - while (rc->txq.n > 0) { - struct ofpbuf *b = queue_pop_head(&rc->txq); + while (!list_is_empty(&rc->txq)) { + struct ofpbuf *b = ofpbuf_from_list(list_pop_front(&rc->txq)); struct rconn_packet_counter *counter = b->private_p; if (counter) { rconn_packet_counter_dec(counter); @@ -1043,16 +1046,6 @@ state_transition(struct rconn *rc, enum state state) rc->state_entered = time_now(); } -static void -question_connectivity(struct rconn *rc) -{ - time_t now = time_now(); - if (now - rc->last_questioned > 60) { - rc->questionable_connectivity = true; - rc->last_questioned = now; - } -} - static void copy_to_monitor(struct rconn *rc, const struct ofpbuf *b) { @@ -1082,7 +1075,7 @@ copy_to_monitor(struct rconn *rc, const struct ofpbuf *b) } static bool -is_connected_state(enum state state) +is_connected_state(enum state state) { return (state & (S_ACTIVE | S_IDLE)) != 0; } @@ -1104,3 +1097,12 @@ is_admitted_msg(const struct ofpbuf *b) (1u << OFPT_GET_CONFIG_REPLY) | (1u << OFPT_SET_CONFIG))); } + +/* Returns true if 'rc' is currently logging information about connection + * attempts, false if logging should be suppressed because 'rc' hasn't + * successuflly connected in too long. */ +static bool +rconn_logging_connection_attempts__(const struct rconn *rc) +{ + return rc->backoff < rc->max_backoff; +}