hmap_random_node: Improve distribution

[sliver-openvswitch.git] / lib / hmap.c
diff --git a/lib/hmap.c b/lib/hmap.c

index 71943a7..542d8b5 100644 (file)
--- a/lib/hmap.c
+++ b/lib/hmap.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2008, 2009, 2010 Nicira Networks.
+ * Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -16,10 +16,19 @@
  
  #include <config.h>
  #include "hmap.h"
-#include <assert.h>
  #include <stdint.h>
+#include <string.h>
  #include "coverage.h"
+#include "random.h"
  #include "util.h"
+#include "vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(hmap);
+
+COVERAGE_DEFINE(hmap_pathological);
+COVERAGE_DEFINE(hmap_expand);
+COVERAGE_DEFINE(hmap_shrink);
+COVERAGE_DEFINE(hmap_reserve);
  
  /* Initializes 'hmap' as an empty hash table. */
  void
@@ -41,6 +50,22 @@ hmap_destroy(struct hmap *hmap)
      }
  }
  
+/* Removes all node from 'hmap', leaving it ready to accept more nodes.  Does
+ * not free memory allocated for 'hmap'.
+ *
+ * This function is appropriate when 'hmap' will soon have about as many
+ * elements as it before.  If 'hmap' will likely have fewer elements than
+ * before, use hmap_destroy() followed by hmap_clear() to save memory and
+ * iteration time. */
+void
+hmap_clear(struct hmap *hmap)
+{
+    if (hmap->n > 0) {
+        hmap->n = 0;
+        memset(hmap->buckets, 0, (hmap->mask + 1) * sizeof *hmap->buckets);
+    }
+}
+
  /* Exchanges hash maps 'a' and 'b'. */
  void
  hmap_swap(struct hmap *a, struct hmap *b)
@@ -63,13 +88,12 @@ hmap_moved(struct hmap *hmap)
  }
  
  static void
-resize(struct hmap *hmap, size_t new_mask)
+resize(struct hmap *hmap, size_t new_mask, const char *where)
  {
      struct hmap tmp;
      size_t i;
  
-    assert(!(new_mask & (new_mask + 1)));
-    assert(new_mask != SIZE_MAX);
+    ovs_assert(is_pow2(new_mask + 1));
  
      hmap_init(&tmp);
      if (new_mask) {
@@ -88,7 +112,10 @@ resize(struct hmap *hmap, size_t new_mask)
              count++;
          }
          if (count > 5) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10);
              COVERAGE_INC(hmap_pathological);
+            VLOG_DBG_RL(&rl, "%s: %d nodes in bucket (%"PRIuSIZE" nodes, %"PRIuSIZE" buckets)",
+                        where, count, hmap->n, hmap->mask + 1);
          }
      }
      hmap_swap(hmap, &tmp);
@@ -115,38 +142,50 @@ calc_mask(size_t capacity)
      return mask;
  }
  
-/* Expands 'hmap', if necessary, to optimize the performance of searches. */
+/* Expands 'hmap', if necessary, to optimize the performance of searches.
+ *
+ * ('where' is used in debug logging.  Commonly one would use hmap_expand() to
+ * automatically provide the caller's source file and line number for
+ * 'where'.) */
  void
-hmap_expand(struct hmap *hmap)
+hmap_expand_at(struct hmap *hmap, const char *where)
  {
      size_t new_mask = calc_mask(hmap->n);
      if (new_mask > hmap->mask) {
          COVERAGE_INC(hmap_expand);
-        resize(hmap, new_mask);
+        resize(hmap, new_mask, where);
      }
  }
  
-/* Shrinks 'hmap', if necessary, to optimize the performance of iteration. */
+/* Shrinks 'hmap', if necessary, to optimize the performance of iteration.
+ *
+ * ('where' is used in debug logging.  Commonly one would use hmap_shrink() to
+ * automatically provide the caller's source file and line number for
+ * 'where'.) */
  void
-hmap_shrink(struct hmap *hmap)
+hmap_shrink_at(struct hmap *hmap, const char *where)
  {
      size_t new_mask = calc_mask(hmap->n);
      if (new_mask < hmap->mask) {
          COVERAGE_INC(hmap_shrink);
-        resize(hmap, new_mask);
+        resize(hmap, new_mask, where);
      }
  }
  
  /* Expands 'hmap', if necessary, to optimize the performance of searches when
   * it has up to 'n' elements.  (But iteration will be slow in a hash map whose
- * allocated capacity is much higher than its current number of nodes.)  */
+ * allocated capacity is much higher than its current number of nodes.)
+ *
+ * ('where' is used in debug logging.  Commonly one would use hmap_reserve() to
+ * automatically provide the caller's source file and line number for
+ * 'where'.) */
  void
-hmap_reserve(struct hmap *hmap, size_t n)
+hmap_reserve_at(struct hmap *hmap, size_t n, const char *where)
  {
      size_t new_mask = calc_mask(n);
      if (new_mask > hmap->mask) {
          COVERAGE_INC(hmap_reserve);
-        resize(hmap, new_mask);
+        resize(hmap, new_mask, where);
      }
  }
  
@@ -163,3 +202,93 @@ hmap_node_moved(struct hmap *hmap,
      *bucket = node;
  }
  
+/* Chooses and returns a randomly selected node from 'hmap', which must not be
+ * empty.
+ *
+ * I wouldn't depend on this algorithm to be fair, since I haven't analyzed it.
+ * But it does at least ensure that any node in 'hmap' can be chosen. */
+struct hmap_node *
+hmap_random_node(const struct hmap *hmap)
+{
+    struct hmap_node *bucket, *node;
+    size_t n, i;
+
+    /* Choose a random non-empty bucket. */
+    for (;;) {
+        bucket = hmap->buckets[random_uint32() & hmap->mask];
+        if (bucket) {
+            break;
+        }
+    }
+
+    /* Count nodes in bucket. */
+    n = 0;
+    for (node = bucket; node; node = node->next) {
+        n++;
+    }
+
+    /* Choose random node from bucket. */
+    i = random_range(n);
+    for (node = bucket; i-- > 0; node = node->next) {
+        continue;
+    }
+    return node;
+}
+
+/* Returns the next node in 'hmap' in hash order, or NULL if no nodes remain in
+ * 'hmap'.  Uses '*bucketp' and '*offsetp' to determine where to begin
+ * iteration, and stores new values to pass on the next iteration into them
+ * before returning.
+ *
+ * It's better to use plain HMAP_FOR_EACH and related functions, since they are
+ * faster and better at dealing with hmaps that change during iteration.
+ *
+ * Before beginning iteration, store 0 into '*bucketp' and '*offsetp'.
+ */
+struct hmap_node *
+hmap_at_position(const struct hmap *hmap,
+                 uint32_t *bucketp, uint32_t *offsetp)
+{
+    size_t offset;
+    size_t b_idx;
+
+    offset = *offsetp;
+    for (b_idx = *bucketp; b_idx <= hmap->mask; b_idx++) {
+        struct hmap_node *node;
+        size_t n_idx;
+
+        for (n_idx = 0, node = hmap->buckets[b_idx]; node != NULL;
+             n_idx++, node = node->next) {
+            if (n_idx == offset) {
+                if (node->next) {
+                    *bucketp = node->hash & hmap->mask;
+                    *offsetp = offset + 1;
+                } else {
+                    *bucketp = (node->hash & hmap->mask) + 1;
+                    *offsetp = 0;
+                }
+                return node;
+            }
+        }
+        offset = 0;
+    }
+
+    *bucketp = 0;
+    *offsetp = 0;
+    return NULL;
+}
+
+/* Returns true if 'node' is in 'hmap', false otherwise. */
+bool
+hmap_contains(const struct hmap *hmap, const struct hmap_node *node)
+{
+    struct hmap_node *p;
+
+    for (p = hmap_first_in_bucket(hmap, node->hash); p; p = p->next) {
+        if (p == node) {
+            return true;
+        }
+    }
+
+    return false;
+}