struct timer tx_timer; /* Send CCM when expired. */
struct timer fault_timer; /* Check for faults when expired. */
-
- long long x_recv_time;
};
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+
static int
ccm_interval_to_ms(uint8_t interval)
{
NOT_REACHED();
}
+static long long int
+cfm_fault_interval(struct cfm_internal *cfmi)
+{
+ /* According to the 802.1ag specification we should assume every other MP
+ * with the same MAID has the same transmission interval that we have. If
+ * an MP has a different interval, cfm_process_heartbeat will register it
+ * as a fault (likely due to a configuration error). Thus we can check all
+ * MPs at once making this quite a bit simpler.
+ *
+ * According to the specification we should check when (ccm_interval_ms *
+ * 3.5)ms have passed. */
+ return (cfmi->ccm_interval_ms * 7) / 2;
+}
+
static uint8_t
ms_to_ccm_interval(int interval_ms)
{
}
/* Allocates a 'cfm' object. This object should have its 'mpid', 'maid',
- * 'eth_src', and 'interval' filled out. When changes are made to the 'cfm'
- * object, cfm_configure should be called before using it. */
+ * 'eth_src', and 'interval' filled out. cfm_configure() should be called
+ * whenever changes are made to 'cfm', and before cfm_run() is called for the
+ * first time. */
struct cfm *
cfm_create(void)
{
cfmi = xzalloc(sizeof *cfmi);
cfm = &cfmi->cfm;
- cfmi->x_recv_time = LLONG_MIN;
hmap_init(&cfm->remote_mps);
return cfm;
void
cfm_run(struct cfm *cfm)
{
- long long now = time_msec();
struct cfm_internal *cfmi = cfm_to_internal(cfm);
- long long fault_interval;
- /* According to the 802.1ag specification we should assume every other MP
- * with the same MAID has the same transmission interval that we have. If
- * an MP has a different interval, cfm_process_heartbeat will register it
- * as a fault (likely due to a configuration error). Thus we can check all
- * MPs at once making this quite a bit simpler.
- *
- * According to the specification we should check when (ccm_interval_ms *
- * 3.5)ms have passed. */
- fault_interval = (cfmi->ccm_interval_ms * 7) / 2;
if (timer_expired(&cfmi->fault_timer)) {
- bool fault;
+ long long int interval = cfm_fault_interval(cfmi);
struct remote_mp *rmp;
- fault = now < cfmi->x_recv_time + fault_interval;
-
+ cfm->fault = false;
HMAP_FOR_EACH (rmp, node, &cfm->remote_mps) {
- if (timer_expired_at(&cfmi->fault_timer, rmp->recv_time)) {
- rmp->fault = true;
- }
+ rmp->fault = !rmp->recv;
+ rmp->recv = false;
if (rmp->fault) {
- fault = true;
+ cfm->fault = true;
+ VLOG_DBG("No CCM from RMP %"PRIu16" in the last %lldms",
+ rmp->mpid, interval);
}
}
- cfm->fault = fault;
- timer_set_duration(&cfmi->fault_timer, fault_interval);
+ if (!cfm->fault) {
+ VLOG_DBG("All RMPs received CCMs in the last %lldms", interval);
+ }
+
+ timer_set_duration(&cfmi->fault_timer, interval);
}
}
cfmi->ccm_interval = interval;
cfmi->ccm_interval_ms = ccm_interval_to_ms(interval);
- /* Force a resend and check in case anything changed. */
timer_set_expired(&cfmi->tx_timer);
- timer_set_expired(&cfmi->fault_timer);
+ timer_set_duration(&cfmi->fault_timer, cfm_fault_interval(cfmi));
}
return true;
uint8_t ccm_interval;
struct remote_mp *rmp;
struct eth_header *eth;
-
- struct cfm_internal *cfmi = cfm_to_internal(cfm);
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+ struct cfm_internal *cfmi = cfm_to_internal(cfm);
eth = p->l2;
ccm = ofpbuf_at(p, (uint8_t *)p->l3 - (uint8_t *)p->data, CCM_LEN);
return;
}
+ /* According to the 802.1ag specification, reception of a CCM with an
+ * incorrect ccm_interval, unexpected MAID, or unexpected MPID should
+ * trigger a fault. We ignore this requirement for several reasons.
+ *
+ * Faults can cause a controller or Open vSwitch to make potentially
+ * expensive changes to the network topology. It seems prudent to trigger
+ * them judiciously, especially when CFM is used to check slave status of
+ * bonds. Furthermore, faults can be maliciously triggered by crafting
+ * invalid CCMs. */
if (memcmp(ccm->maid, cfm->maid, sizeof ccm->maid)) {
- cfmi->x_recv_time = time_msec();
- cfm->fault = true;
VLOG_WARN_RL(&rl, "Received unexpected remote MAID from MAC "
ETH_ADDR_FMT, ETH_ADDR_ARGS(eth->eth_src));
} else {
rmp = lookup_remote_mp(&cfm->remote_mps, ccm_mpid);
if (rmp) {
- rmp->recv_time = time_msec();
- rmp->fault = ccm_interval != cfmi->ccm_interval;
- cfm->fault = rmp->fault || cfm->fault;
+ rmp->recv = true;
+
+ if (ccm_interval != cfmi->ccm_interval) {
+ VLOG_WARN_RL(&rl, "received a CCM with an invalid interval"
+ " (%"PRIu8") from RMP %"PRIu16, ccm_interval,
+ rmp->mpid);
+ }
} else {
- cfmi->x_recv_time = time_msec();
- cfm->fault = true;
VLOG_WARN_RL(&rl, "Received unexpected remote MPID %d from MAC "
ETH_ADDR_FMT, ccm_mpid, ETH_ADDR_ARGS(eth->eth_src));
}
+
+ VLOG_DBG("Received CCM (mpid %"PRIu16") (interval %"PRIu8")", ccm_mpid,
+ ccm_interval);
}
}
cfm_dump_ds(const struct cfm *cfm, struct ds *ds)
{
const struct cfm_internal *cfmi = cfm_to_internal(cfm);
- long long int now = time_msec();
struct remote_mp *rmp;
ds_put_format(ds, "MPID %"PRIu16": %s\n", cfm->mpid,
ds_put_format(ds, "\tnext fault check: %lldms\n",
timer_msecs_until_expired(&cfmi->fault_timer));
- if (cfmi->x_recv_time != LLONG_MIN) {
- ds_put_format(ds, "\ttime since bad CCM rx: %lldms\n",
- now - cfmi->x_recv_time);
- }
-
ds_put_cstr(ds, "\n");
HMAP_FOR_EACH (rmp, node, &cfm->remote_mps) {
ds_put_format(ds, "Remote MPID %"PRIu16": %s\n", rmp->mpid,
rmp->fault ? "fault" : "");
- ds_put_format(ds, "\ttime since CCM rx: %lldms\n",
- time_msec() - rmp->recv_time);
+ ds_put_format(ds, "\trecv since check: %s",
+ rmp->recv ? "true" : "false");
}
}