Consistently write null pointer constants as NULL instead of 0.

[sliver-openvswitch.git] / lib / cfm.c
diff --git a/lib/cfm.c b/lib/cfm.c

index 1626514..a605c09 100644 (file)
--- a/lib/cfm.c
+++ b/lib/cfm.c
@@ -21,12 +21,14 @@
  #include <stdlib.h>
  #include <string.h>
  
+#include "dynamic-string.h"
  #include "flow.h"
  #include "hash.h"
  #include "hmap.h"
  #include "ofpbuf.h"
  #include "packets.h"
  #include "poll-loop.h"
+#include "timer.h"
  #include "timeval.h"
  #include "vlog.h"
  
@@ -41,10 +43,12 @@ struct cfm_internal {
      uint8_t ccm_interval;  /* The CCM transmission interval. */
      int ccm_interval_ms;   /* 'ccm_interval' in milliseconds. */
  
-    long long ccm_sent;    /* The time we last sent a CCM. */
-    long long fault_check; /* The time we last checked for faults. */
+    struct timer tx_timer;    /* Send CCM when expired. */
+    struct timer fault_timer; /* Check for faults when expired. */
  };
  
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+
  static int
  ccm_interval_to_ms(uint8_t interval)
  {
@@ -63,6 +67,20 @@ ccm_interval_to_ms(uint8_t interval)
      NOT_REACHED();
  }
  
+static long long int
+cfm_fault_interval(struct cfm_internal *cfmi)
+{
+    /* According to the 802.1ag specification we should assume every other MP
+     * with the same MAID has the same transmission interval that we have.  If
+     * an MP has a different interval, cfm_process_heartbeat will register it
+     * as a fault (likely due to a configuration error).  Thus we can check all
+     * MPs at once making this quite a bit simpler.
+     *
+     * According to the specification we should check when (ccm_interval_ms *
+     * 3.5)ms have passed. */
+    return (cfmi->ccm_interval_ms * 7) / 2;
+}
+
  static uint8_t
  ms_to_ccm_interval(int interval_ms)
  {
@@ -78,7 +96,7 @@ ms_to_ccm_interval(int interval_ms)
  }
  
  static struct cfm_internal *
-cfm_to_internal(struct cfm *cfm)
+cfm_to_internal(const struct cfm *cfm)
  {
      return CONTAINER_OF(cfm, struct cfm_internal, cfm);
  }
@@ -111,8 +129,9 @@ lookup_remote_mp(const struct hmap *hmap, uint16_t mpid)
  }
  
  /* Allocates a 'cfm' object.  This object should have its 'mpid', 'maid',
- * 'eth_src', and 'interval' filled out.  When changes are made to the 'cfm'
- * object, cfm_configure should be called before using it. */
+ * 'eth_src', and 'interval' filled out.  cfm_configure() should be called
+ * whenever changes are made to 'cfm', and before cfm_run() is called for the
+ * first time. */
  struct cfm *
  cfm_create(void)
  {
@@ -123,16 +142,14 @@ cfm_create(void)
      cfm  = &cfmi->cfm;
  
      hmap_init(&cfm->remote_mps);
-    hmap_init(&cfm->x_remote_mps);
-    hmap_init(&cfm->x_remote_maids);
      return cfm;
  }
  
  void
  cfm_destroy(struct cfm *cfm)
  {
+    struct cfm_internal *cfmi = cfm_to_internal(cfm);
      struct remote_mp *rmp, *rmp_next;
-    struct remote_maid *rmaid, *rmaid_next;
  
      if (!cfm) {
          return;
@@ -143,69 +160,37 @@ cfm_destroy(struct cfm *cfm)
          free(rmp);
      }
  
-    HMAP_FOR_EACH_SAFE (rmp, rmp_next, node, &cfm->x_remote_mps) {
-        hmap_remove(&cfm->x_remote_mps, &rmp->node);
-        free(rmp);
-    }
-
-    HMAP_FOR_EACH_SAFE (rmaid, rmaid_next, node, &cfm->x_remote_maids) {
-        hmap_remove(&cfm->x_remote_maids, &rmaid->node);
-        free(rmaid);
-    }
-
      hmap_destroy(&cfm->remote_mps);
-    hmap_destroy(&cfm->x_remote_mps);
-    hmap_destroy(&cfm->x_remote_maids);
-    free(cfm_to_internal(cfm));
+    free(cfmi);
  }
  
  /* Should be run periodically to update fault statistics messages. */
  void
  cfm_run(struct cfm *cfm)
  {
-    long long now = time_msec();
      struct cfm_internal *cfmi = cfm_to_internal(cfm);
  
-    /* According to the 802.1ag specification we should assume every other MP
-     * with the same MAID has the same transmission interval that we have.  If
-     * an MP has a different interval, cfm_process_heartbeat will register it
-     * as a fault (likely due to a configuration error).  Thus we can check all
-     * MPs at once making this quite a bit simpler.
-     *
-     * According to the specification we should check when (ccm_interval_ms *
-     * 3.5)ms have passed.  We changed the multiplier to 4 to avoid messy
-     * floating point arithmetic and add a bit of wiggle room. */
-    if (now >= cfmi->fault_check + cfmi->ccm_interval_ms * 4) {
-        bool fault;
-        struct remote_mp *rmp, *rmp_next;
-        struct remote_maid *rmaid, *rmaid_next;
-
-        fault = false;
+    if (timer_expired(&cfmi->fault_timer)) {
+        long long int interval = cfm_fault_interval(cfmi);
+        struct remote_mp *rmp;
  
+        cfm->fault = false;
          HMAP_FOR_EACH (rmp, node, &cfm->remote_mps) {
-            rmp->fault = rmp->fault || cfmi->fault_check > rmp->recv_time;
-            fault      = rmp->fault || fault;
-        }
+            rmp->fault = !rmp->recv;
+            rmp->recv = false;
  
-        HMAP_FOR_EACH_SAFE (rmp, rmp_next, node, &cfm->x_remote_mps) {
-            if (cfmi->fault_check > rmp->recv_time) {
-                hmap_remove(&cfm->x_remote_mps, &rmp->node);
-                free(rmp);
+            if (rmp->fault) {
+                cfm->fault = true;
+                VLOG_DBG("No CCM from RMP %"PRIu16" in the last %lldms",
+                         rmp->mpid, interval);
              }
          }
  
-        HMAP_FOR_EACH_SAFE (rmaid, rmaid_next, node, &cfm->x_remote_maids) {
-            if (cfmi->fault_check > rmaid->recv_time) {
-                hmap_remove(&cfm->x_remote_maids, &rmaid->node);
-                free(rmaid);
-            }
+        if (!cfm->fault) {
+            VLOG_DBG("All RMPs received CCMs in the last %lldms", interval);
          }
  
-        fault = (fault || !hmap_is_empty(&cfm->x_remote_mps)
-                 || !hmap_is_empty(&cfm->x_remote_maids));
-
-        cfm->fault        = fault;
-        cfmi->fault_check = now;
+        timer_set_duration(&cfmi->fault_timer, interval);
      }
  }
  
@@ -216,7 +201,7 @@ cfm_should_send_ccm(struct cfm *cfm)
  {
      struct cfm_internal *cfmi = cfm_to_internal(cfm);
  
-    return time_msec() >= cfmi->ccm_sent + cfmi->ccm_interval_ms;
+    return timer_expired(&cfmi->tx_timer);
  }
  
  /* Composes a CCM message into 'ccm'.  Messages generated with this function
@@ -226,7 +211,7 @@ cfm_compose_ccm(struct cfm *cfm, struct ccm *ccm)
  {
      struct cfm_internal *cfmi = cfm_to_internal(cfm);
  
-    cfmi->ccm_sent = time_msec();
+    timer_set_duration(&cfmi->tx_timer, cfmi->ccm_interval_ms);
  
      ccm->mdlevel_version = 0;
      ccm->opcode = CCM_OPCODE;
@@ -240,12 +225,10 @@ cfm_compose_ccm(struct cfm *cfm, struct ccm *ccm)
  void
  cfm_wait(struct cfm *cfm)
  {
-    long long wait;
      struct cfm_internal *cfmi = cfm_to_internal(cfm);
  
-    wait = MIN(cfmi->ccm_sent + cfmi->ccm_interval_ms,
-               cfmi->fault_check + cfmi->ccm_interval_ms * 4);
-    poll_timer_wait_until(wait);
+    timer_wait(&cfmi->tx_timer);
+    timer_wait(&cfmi->fault_timer);
  }
  
  /* Should be called whenever a client of the cfm library changes the internals
@@ -253,19 +236,23 @@ cfm_wait(struct cfm *cfm)
  bool
  cfm_configure(struct cfm *cfm)
  {
-    struct cfm_internal *cfmi;
+    struct cfm_internal *cfmi = cfm_to_internal(cfm);
+    uint8_t interval;
  
      if (!cfm_is_valid_mpid(cfm->mpid) || !cfm->interval) {
          return false;
      }
  
-    cfmi                  = cfm_to_internal(cfm);
-    cfmi->ccm_interval    = ms_to_ccm_interval(cfm->interval);
-    cfmi->ccm_interval_ms = ccm_interval_to_ms(cfmi->ccm_interval);
+    interval = ms_to_ccm_interval(cfm->interval);
+
+    if (interval != cfmi->ccm_interval) {
+        cfmi->ccm_interval = interval;
+        cfmi->ccm_interval_ms = ccm_interval_to_ms(interval);
+
+        timer_set_expired(&cfmi->tx_timer);
+        timer_set_duration(&cfmi->fault_timer, cfm_fault_interval(cfmi));
+    }
  
-    /* Force a resend and check in case anything changed. */
-    cfmi->ccm_sent    = 0;
-    cfmi->fault_check = 0;
      return true;
  }
  
@@ -290,8 +277,6 @@ cfm_update_remote_mps(struct cfm *cfm, const uint16_t *mpids, size_t n_mpids)
  
          if ((rmp = lookup_remote_mp(&cfm->remote_mps, mpid))) {
              hmap_remove(&cfm->remote_mps, &rmp->node);
-        } else if ((rmp = lookup_remote_mp(&cfm->x_remote_mps, mpid))) {
-            hmap_remove(&cfm->x_remote_mps, &rmp->node);
          } else {
              rmp = xzalloc(sizeof *rmp);
              rmp->mpid = mpid;
@@ -371,13 +356,12 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p)
  {
      struct ccm *ccm;
      uint16_t ccm_mpid;
-    uint32_t ccm_seq;
      uint8_t ccm_interval;
      struct remote_mp *rmp;
+    struct eth_header *eth;
+    struct cfm_internal *cfmi = cfm_to_internal(cfm);
  
-    struct cfm_internal *cfmi        = cfm_to_internal(cfm);
-    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
-
+    eth = p->l2;
      ccm = ofpbuf_at(p, (uint8_t *)p->l3 - (uint8_t *)p->data, CCM_LEN);
  
      if (!ccm) {
@@ -391,43 +375,62 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p)
          return;
      }
  
+    /* According to the 802.1ag specification, reception of a CCM with an
+     * incorrect ccm_interval, unexpected MAID, or unexpected MPID should
+     * trigger a fault.  We ignore this requirement for several reasons.
+     *
+     * Faults can cause a controller or Open vSwitch to make potentially
+     * expensive changes to the network topology.  It seems prudent to trigger
+     * them judiciously, especially when CFM is used to check slave status of
+     * bonds. Furthermore, faults can be maliciously triggered by crafting
+     * invalid CCMs. */
      if (memcmp(ccm->maid, cfm->maid, sizeof ccm->maid)) {
-        uint32_t hash;
-        struct remote_maid *rmaid;
+        VLOG_WARN_RL(&rl, "Received unexpected remote MAID from MAC "
+                     ETH_ADDR_FMT, ETH_ADDR_ARGS(eth->eth_src));
+    } else {
+        ccm_mpid = ntohs(ccm->mpid);
+        ccm_interval = ccm->flags & 0x7;
  
-        hash = hash_bytes(ccm->maid, sizeof ccm->maid, 0);
+        rmp = lookup_remote_mp(&cfm->remote_mps, ccm_mpid);
  
-        HMAP_FOR_EACH_IN_BUCKET (rmaid, node, hash, &cfm->x_remote_maids) {
-            if (memcmp(rmaid->maid, ccm->maid, sizeof rmaid->maid) == 0) {
-                rmaid->recv_time = time_msec();
-                return;
+        if (rmp) {
+            rmp->recv = true;
+
+            if (ccm_interval != cfmi->ccm_interval) {
+                VLOG_WARN_RL(&rl, "received a CCM with an invalid interval"
+                             " (%"PRIu8") from RMP %"PRIu16, ccm_interval,
+                             rmp->mpid);
              }
+        } else {
+            VLOG_WARN_RL(&rl, "Received unexpected remote MPID %d from MAC "
+                         ETH_ADDR_FMT, ccm_mpid, ETH_ADDR_ARGS(eth->eth_src));
          }
  
-        rmaid            = xzalloc(sizeof *rmaid);
-        rmaid->recv_time = time_msec();
-        memcpy(rmaid->maid, ccm->maid, sizeof rmaid->maid);
-        hmap_insert(&cfm->x_remote_maids, &rmaid->node, hash);
-        return;
+        VLOG_DBG("Received CCM (mpid %"PRIu16") (interval %"PRIu8")", ccm_mpid,
+                 ccm_interval);
      }
+}
  
-    ccm_mpid     = ntohs(ccm->mpid);
-    ccm_seq      = ntohl(ccm->seq);
-    ccm_interval = ccm->flags & 0x7;
-
-    rmp = lookup_remote_mp(&cfm->remote_mps, ccm_mpid);
-
-    if (!rmp) {
-        rmp = lookup_remote_mp(&cfm->x_remote_mps, ccm_mpid);
-    }
+void
+cfm_dump_ds(const struct cfm *cfm, struct ds *ds)
+{
+    const struct cfm_internal *cfmi = cfm_to_internal(cfm);
+    struct remote_mp *rmp;
  
-    if (!rmp) {
-        rmp       = xzalloc(sizeof *rmp);
-        rmp->mpid = ccm_mpid;
-        hmap_insert(&cfm->x_remote_mps, &rmp->node, hash_mpid(ccm_mpid));
-        cfm->fault = true;
+    ds_put_format(ds, "MPID %"PRIu16": %s\n", cfm->mpid,
+                  cfm->fault ? "fault" : "");
+
+    ds_put_format(ds, "\tinterval: %dms\n", cfmi->ccm_interval_ms);
+    ds_put_format(ds, "\tnext CCM tx: %lldms\n",
+                  timer_msecs_until_expired(&cfmi->tx_timer));
+    ds_put_format(ds, "\tnext fault check: %lldms\n",
+                  timer_msecs_until_expired(&cfmi->fault_timer));
+
+    ds_put_cstr(ds, "\n");
+    HMAP_FOR_EACH (rmp, node, &cfm->remote_mps) {
+        ds_put_format(ds, "Remote MPID %"PRIu16": %s\n", rmp->mpid,
+                      rmp->fault ? "fault" : "");
+        ds_put_format(ds, "\trecv since check: %s",
+                      rmp->recv ? "true" : "false");
      }
-
-    rmp->recv_time = time_msec();
-    rmp->fault     = ccm_interval != cfmi->ccm_interval;
  }