From: Ethan Jackson Date: Fri, 1 Apr 2011 20:10:49 +0000 (-0700) Subject: cfm: Allow time for CCM reception after cfm_configure(); X-Git-Tag: v1.1.0~7 X-Git-Url: http://git.onelab.eu/?p=sliver-openvswitch.git;a=commitdiff_plain;h=aac19178a43f5ed3367e2d7a7a19ae34fdf56cfb cfm: Allow time for CCM reception after cfm_configure(); Before this (and the previous) patch, whenever cfm_configure was called it would set the fault_timer to expired. Thus, the next call to cfm_run would notice a lack of CCM reception and trigger a faulted status. This is a bug in and of itself, but normally would not be a big deal because cfm_configure should only be called infrequently (when the database changes). However due to an unrelated bug, cfm_configure() was getting called approximately once per second. This resulted in all monitors showing faults all of the time. This patch fixes the problem by not expiring the timer at cfm_configure(). Instead it gives it the appropriate fault_interval amount of time to miss heartbeats. Bug #5244. --- diff --git a/lib/cfm.c b/lib/cfm.c index f9b348863..5a547d0d8 100644 --- a/lib/cfm.c +++ b/lib/cfm.c @@ -67,6 +67,20 @@ ccm_interval_to_ms(uint8_t interval) NOT_REACHED(); } +static long long int +cfm_fault_interval(struct cfm_internal *cfmi) +{ + /* According to the 802.1ag specification we should assume every other MP + * with the same MAID has the same transmission interval that we have. If + * an MP has a different interval, cfm_process_heartbeat will register it + * as a fault (likely due to a configuration error). Thus we can check all + * MPs at once making this quite a bit simpler. + * + * According to the specification we should check when (ccm_interval_ms * + * 3.5)ms have passed. */ + return (cfmi->ccm_interval_ms * 7) / 2; +} + static uint8_t ms_to_ccm_interval(int interval_ms) { @@ -156,22 +170,12 @@ cfm_run(struct cfm *cfm) { long long now = time_msec(); struct cfm_internal *cfmi = cfm_to_internal(cfm); - long long fault_interval; - /* According to the 802.1ag specification we should assume every other MP - * with the same MAID has the same transmission interval that we have. If - * an MP has a different interval, cfm_process_heartbeat will register it - * as a fault (likely due to a configuration error). Thus we can check all - * MPs at once making this quite a bit simpler. - * - * According to the specification we should check when (ccm_interval_ms * - * 3.5)ms have passed. */ - fault_interval = (cfmi->ccm_interval_ms * 7) / 2; if (timer_expired(&cfmi->fault_timer)) { bool fault; struct remote_mp *rmp; - fault = now < cfmi->x_recv_time + fault_interval; + fault = now < cfmi->x_recv_time + cfm_fault_interval(cfmi); HMAP_FOR_EACH (rmp, node, &cfm->remote_mps) { if (timer_expired_at(&cfmi->fault_timer, rmp->recv_time)) { @@ -184,7 +188,7 @@ cfm_run(struct cfm *cfm) } cfm->fault = fault; - timer_set_duration(&cfmi->fault_timer, fault_interval); + timer_set_duration(&cfmi->fault_timer, cfm_fault_interval(cfmi)); } } @@ -243,9 +247,8 @@ cfm_configure(struct cfm *cfm) cfmi->ccm_interval = interval; cfmi->ccm_interval_ms = ccm_interval_to_ms(interval); - /* Force a resend and check in case anything changed. */ timer_set_expired(&cfmi->tx_timer); - timer_set_expired(&cfmi->fault_timer); + timer_set_duration(&cfmi->fault_timer, cfm_fault_interval(cfmi)); } return true;