/*
- * Copyright (c) 2009 Nicira Networks.
+ * Copyright (c) 2009, 2010, 2011 Nicira Networks.
* Distributed under the terms of the GNU GPL version 2.
*
* Significant portions of this file may be copied from parts of the Linux
#include "flow.h"
#include "datapath.h"
+#include "table.h"
+#include <linux/genetlink.h>
#include <linux/gfp.h>
-#include <linux/jhash.h>
#include <linux/slab.h>
-#include <linux/vmalloc.h>
#include <linux/mm.h>
-#include <linux/highmem.h>
#include <asm/pgtable.h>
-static void free_table(struct sw_flow ***flows, unsigned int n_buckets,
- int free_flows)
+/**
+ * struct tbl_bucket - single bucket within a hash table
+ * @rcu: RCU callback structure
+ * @n_objs: number of objects in @objs[] array
+ * @objs: array of @n_objs pointers to table nodes contained inside objects
+ *
+ * The expected number of objects per bucket is 1, but this allows for an
+ * arbitrary number of collisions.
+ */
+struct tbl_bucket {
+ struct rcu_head rcu;
+ unsigned int n_objs;
+ struct tbl_node *objs[];
+};
+
+static struct tbl_bucket *get_bucket(struct tbl_bucket __rcu *bucket)
+{
+ return rcu_dereference_check(bucket, rcu_read_lock_held() ||
+ lockdep_genl_is_held());
+}
+
+static struct tbl_bucket *get_bucket_protected(struct tbl_bucket __rcu *bucket)
+{
+ return rcu_dereference_protected(bucket, lockdep_genl_is_held());
+}
+
+static inline int bucket_size(int n_objs)
+{
+ return sizeof(struct tbl_bucket) + sizeof(struct tbl_node *) * n_objs;
+}
+
+static struct tbl_bucket *bucket_alloc(int n_objs)
+{
+ return kmalloc(bucket_size(n_objs), GFP_KERNEL);
+}
+
+static void free_buckets(struct tbl_bucket __rcu ***l1,
+ unsigned int n_buckets,
+ void (*free_obj)(struct tbl_node *))
{
unsigned int i;
- for (i = 0; i < n_buckets >> DP_L1_BITS; i++) {
- struct sw_flow **l2 = flows[i];
- if (free_flows) {
- unsigned int j;
- for (j = 0; j < DP_L1_SIZE; j++) {
- if (l2[j])
- flow_free(l2[j]);
+ for (i = 0; i < n_buckets >> TBL_L1_SHIFT; i++) {
+ struct tbl_bucket __rcu **l2 = l1[i];
+ unsigned int j;
+
+ for (j = 0; j < TBL_L2_SIZE; j++) {
+ struct tbl_bucket *bucket = (struct tbl_bucket __force *)l2[j];
+ if (!bucket)
+ continue;
+
+ if (free_obj) {
+ unsigned int k;
+ for (k = 0; k < bucket->n_objs; k++)
+ free_obj(bucket->objs[k]);
}
+ kfree(bucket);
}
free_page((unsigned long)l2);
}
- kfree(flows);
+ kfree(l1);
}
-static struct sw_flow ***alloc_table(unsigned int n_buckets)
+static struct tbl_bucket __rcu ***alloc_buckets(unsigned int n_buckets)
{
- struct sw_flow ***flows;
+ struct tbl_bucket __rcu ***l1;
unsigned int i;
- flows = kmalloc((n_buckets >> DP_L1_BITS) * sizeof(struct sw_flow**),
- GFP_KERNEL);
- if (!flows)
+ l1 = kmalloc((n_buckets >> TBL_L1_SHIFT) * sizeof(struct tbl_bucket **),
+ GFP_KERNEL);
+ if (!l1)
return NULL;
- for (i = 0; i < n_buckets >> DP_L1_BITS; i++) {
- flows[i] = (struct sw_flow **)get_zeroed_page(GFP_KERNEL);
- if (!flows[i]) {
- free_table(flows, i << DP_L1_BITS, 0);
+ for (i = 0; i < n_buckets >> TBL_L1_SHIFT; i++) {
+ l1[i] = (struct tbl_bucket __rcu **)get_zeroed_page(GFP_KERNEL);
+ if (!l1[i]) {
+ free_buckets(l1, i << TBL_L1_SHIFT, NULL);
return NULL;
}
}
- return flows;
+ return l1;
}
-struct dp_table *dp_table_create(unsigned int n_buckets)
+/**
+ * tbl_create - create and return a new hash table
+ * @n_buckets: number of buckets in the new table
+ *
+ * Creates and returns a new hash table, or %NULL if memory cannot be
+ * allocated. @n_buckets must be a power of 2 in the range %TBL_MIN_BUCKETS to
+ * %TBL_MAX_BUCKETS.
+ */
+struct tbl *tbl_create(unsigned int n_buckets)
{
- struct dp_table *table;
+ struct tbl *table;
- table = kzalloc(sizeof *table, GFP_KERNEL);
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
if (!table)
goto err;
table->n_buckets = n_buckets;
- table->flows[0] = alloc_table(n_buckets);
- if (!table[0].flows)
- goto err_free_tables;
-
- table->flows[1] = alloc_table(n_buckets);
- if (!table->flows[1])
- goto err_free_flows0;
+ table->buckets = alloc_buckets(n_buckets);
+ if (!table->buckets)
+ goto err_free_table;
return table;
-err_free_flows0:
- free_table(table->flows[0], table->n_buckets, 0);
-err_free_tables:
+err_free_table:
kfree(table);
err:
return NULL;
}
-void dp_table_destroy(struct dp_table *table, int free_flows)
+/**
+ * tbl_destroy - destroy hash table and optionally the objects it contains
+ * @table: table to destroy
+ * @destructor: function to be called on objects at destruction time
+ *
+ * If a destructor is null, then the buckets in @table are destroyed
+ * but not the objects within those buckets. This behavior is useful when a
+ * table is being replaced by a larger or smaller one without destroying the
+ * objects.
+ *
+ * If a destructor is not null, then it is called on the objects in @table
+ * before destroying the buckets.
+ */
+void tbl_destroy(struct tbl *table, void (*destructor)(struct tbl_node *))
{
- int i;
- for (i = 0; i < 2; i++)
- free_table(table->flows[i], table->n_buckets, free_flows);
+ if (!table)
+ return;
+
+ free_buckets(table->buckets, table->n_buckets, destructor);
kfree(table);
}
-static struct sw_flow **find_bucket(struct dp_table *table,
- struct sw_flow ***flows, u32 hash)
+static void destroy_table_rcu(struct rcu_head *rcu)
{
- unsigned int l1 = (hash & (table->n_buckets - 1)) >> DP_L1_SHIFT;
- unsigned int l2 = hash & ((1 << DP_L2_BITS) - 1);
- return &flows[l1][l2];
+ struct tbl *table = container_of(rcu, struct tbl, rcu);
+ tbl_destroy(table, table->obj_destructor);
}
-static struct sw_flow *lookup_table(struct dp_table *table,
- struct sw_flow ***flows, u32 hash,
- const struct odp_flow_key *key)
+/**
+ * tbl_deferred_destroy - destroy table after a RCU grace period
+ * @table: table to destroy
+ * @destructor: function to be called on objects at destruction time
+ *
+ * Calls tbl_destroy() on @table after an RCU grace period. If @destructor is
+ * not null it is called on every element before the table is destroyed. */
+void tbl_deferred_destroy(struct tbl *table, void (*destructor)(struct tbl_node *))
{
- struct sw_flow **bucket = find_bucket(table, flows, hash);
- struct sw_flow *flow = rcu_dereference(*bucket);
- if (flow && !memcmp(&flow->key, key, sizeof(struct odp_flow_key)))
- return flow;
- return NULL;
-}
+ if (!table)
+ return;
-static u32 flow_hash0(const struct odp_flow_key *key)
-{
- return jhash2((u32*)key, sizeof *key / sizeof(u32), 0xaaaaaaaa);
+ table->obj_destructor = destructor;
+ call_rcu(&table->rcu, destroy_table_rcu);
}
-static u32 flow_hash1(const struct odp_flow_key *key)
+static struct tbl_bucket __rcu **find_bucket(struct tbl *table, u32 hash)
{
- return jhash2((u32*)key, sizeof *key / sizeof(u32), 0x55555555);
+ unsigned int l1 = (hash & (table->n_buckets - 1)) >> TBL_L1_SHIFT;
+ unsigned int l2 = hash & ((1 << TBL_L2_BITS) - 1);
+ return &table->buckets[l1][l2];
}
-static void find_buckets(struct dp_table *table,
- const struct odp_flow_key *key,
- struct sw_flow **buckets[2])
+static int search_bucket(const struct tbl_bucket *bucket, void *target, u32 hash,
+ int (*cmp)(const struct tbl_node *, void *))
{
- buckets[0] = find_bucket(table, table->flows[0], flow_hash0(key));
- buckets[1] = find_bucket(table, table->flows[1], flow_hash1(key));
+ int i;
+
+ for (i = 0; i < bucket->n_objs; i++) {
+ struct tbl_node *obj = bucket->objs[i];
+ if (obj->hash == hash && likely(cmp(obj, target)))
+ return i;
+ }
+
+ return -1;
}
-struct sw_flow *dp_table_lookup(struct dp_table *table,
- const struct odp_flow_key *key)
+/**
+ * tbl_lookup - searches hash table for a matching object
+ * @table: hash table to search
+ * @target: identifier for the object that is being searched for, will be
+ * provided as an argument to @cmp when making comparisions
+ * @hash: hash of @target
+ * @cmp: comparision function to match objects with the given hash, returns
+ * nonzero if the objects match, zero otherwise
+ *
+ * Searches @table for an object identified by @target. Returns the tbl_node
+ * contained in the object if successful, otherwise %NULL.
+ */
+struct tbl_node *tbl_lookup(struct tbl *table, void *target, u32 hash,
+ int (*cmp)(const struct tbl_node *, void *))
{
- struct sw_flow *flow;
- flow = lookup_table(table, table->flows[0], flow_hash0(key), key);
- if (!flow)
- flow = lookup_table(table, table->flows[1],
- flow_hash1(key), key);
- return flow;
+ struct tbl_bucket __rcu **bucketp = find_bucket(table, hash);
+ struct tbl_bucket *bucket = get_bucket(*bucketp);
+ int index;
+
+ if (!bucket)
+ return NULL;
+
+ index = search_bucket(bucket, target, hash, cmp);
+ if (index < 0)
+ return NULL;
+
+ return bucket->objs[index];
}
-int dp_table_foreach(struct dp_table *table,
- int (*callback)(struct sw_flow *flow, void *aux),
- void *aux)
+/**
+ * tbl_foreach - iterate through hash table
+ * @table: table to iterate
+ * @callback: function to call for each entry
+ * @aux: Extra data to pass to @callback
+ *
+ * Iterates through all of the objects in @table in hash order, passing each of
+ * them in turn to @callback. If @callback returns nonzero, this terminates
+ * the iteration and tbl_foreach() returns the same value. Returns 0 if
+ * @callback never returns nonzero.
+ *
+ * This function does not try to intelligently handle the case where @callback
+ * adds or removes flows in @table.
+ */
+int tbl_foreach(struct tbl *table,
+ int (*callback)(struct tbl_node *, void *aux), void *aux)
{
- unsigned int i, j, k;
- for (i = 0; i < 2; i++) {
- for (j = 0; j < table->n_buckets >> DP_L1_BITS; j++) {
- struct sw_flow **l2 = table->flows[i][j];
- for (k = 0; k < DP_L1_SIZE; k++) {
- struct sw_flow *flow = rcu_dereference(l2[k]);
- if (flow) {
- int error = callback(flow, aux);
- if (error)
- return error;
- }
+ unsigned int n_l1 = table->n_buckets >> TBL_L1_SHIFT;
+ unsigned int l1_idx;
+
+ for (l1_idx = 0; l1_idx < n_l1; l1_idx++) {
+ struct tbl_bucket __rcu **l2 = table->buckets[l1_idx];
+ unsigned int l2_idx;
+
+ for (l2_idx = 0; l2_idx < TBL_L2_SIZE; l2_idx++) {
+ struct tbl_bucket *bucket;
+ unsigned int i;
+
+ bucket = get_bucket(l2[l2_idx]);
+ if (!bucket)
+ continue;
+
+ for (i = 0; i < bucket->n_objs; i++) {
+ int error = (*callback)(bucket->objs[i], aux);
+ if (error)
+ return error;
}
}
}
return 0;
}
-static int insert_flow(struct sw_flow *flow, void *new_table_)
+/**
+ * tbl_next - find next node in hash table
+ * @table: table to iterate
+ * @bucketp: On entry, hash value of bucket to start from. On exit, updated
+ * to bucket to start from on next call.
+ * @objp: On entry, index to start from within first bucket. On exit, updated
+ * to index to start from on next call.
+ *
+ * Returns the next node in @table in hash order, or %NULL when no nodes remain
+ * in the hash table.
+ *
+ * On entry, uses the values that @bucketp and @objp reference to determine
+ * where to begin iteration. Use 0 for both values to begin a new iteration.
+ * On exit, stores the values to pass on the next iteration into @bucketp and
+ * @objp's referents.
+ */
+struct tbl_node *tbl_next(struct tbl *table, u32 *bucketp, u32 *objp)
{
- struct dp_table *new_table = new_table_;
- struct sw_flow **buckets[2];
- int i;
+ unsigned int n_l1 = table->n_buckets >> TBL_L1_SHIFT;
+ u32 s_l1_idx = *bucketp >> TBL_L1_SHIFT;
+ u32 s_l2_idx = *bucketp & (TBL_L2_SIZE - 1);
+ u32 s_obj = *objp;
+ unsigned int l1_idx;
+
+ for (l1_idx = s_l1_idx; l1_idx < n_l1; l1_idx++) {
+ struct tbl_bucket __rcu **l2 = table->buckets[l1_idx];
+ unsigned int l2_idx;
+
+ for (l2_idx = s_l2_idx; l2_idx < TBL_L2_SIZE; l2_idx++) {
+ struct tbl_bucket *bucket;
+
+ bucket = get_bucket_protected(l2[l2_idx]);
+ if (bucket && s_obj < bucket->n_objs) {
+ *bucketp = (l1_idx << TBL_L1_SHIFT) + l2_idx;
+ *objp = s_obj + 1;
+ return bucket->objs[s_obj];
+ }
- find_buckets(new_table, &flow->key, buckets);
- for (i = 0; i < 2; i++) {
- if (!*buckets[i]) {
- rcu_assign_pointer(*buckets[i], flow);
- return 0;
+ s_obj = 0;
}
+ s_l2_idx = 0;
}
- WARN_ON_ONCE(1);
- return 0;
+ *bucketp = 0;
+ *objp = 0;
+ return NULL;
}
-static void dp_free_table_rcu(struct rcu_head *rcu)
+static int insert_table_flow(struct tbl_node *node, void *new_table_)
{
- struct dp_table *table = container_of(rcu, struct dp_table, rcu);
- dp_table_destroy(table, 0);
+ struct tbl *new_table = new_table_;
+ return tbl_insert(new_table, node, node->hash);
}
-int dp_table_expand(struct datapath *dp)
+/**
+ * tbl_expand - create a hash table with more buckets
+ * @table: table to expand
+ *
+ * Creates a new table containing the same objects as @table but with twice
+ * as many buckets. Returns 0 if successful, otherwise a negative error. The
+ * caller should free @table upon success (probably using
+ * tbl_deferred_destroy()).
+ */
+struct tbl *tbl_expand(struct tbl *table)
{
- struct dp_table *old_table = rcu_dereference(dp->table);
- struct dp_table *new_table = dp_table_create(old_table->n_buckets * 2);
+ int err;
+ int n_buckets = table->n_buckets * 2;
+ struct tbl *new_table;
+
+ if (n_buckets >= TBL_MAX_BUCKETS) {
+ err = -ENOSPC;
+ goto error;
+ }
+
+ err = -ENOMEM;
+ new_table = tbl_create(TBL_MIN_BUCKETS);
if (!new_table)
- return -ENOMEM;
- dp_table_foreach(old_table, insert_flow, new_table);
- rcu_assign_pointer(dp->table, new_table);
- call_rcu(&old_table->rcu, dp_free_table_rcu);
- return 0;
+ goto error;
+
+ if (tbl_foreach(table, insert_table_flow, new_table))
+ goto error_free_new_table;
+
+ return new_table;
+
+error_free_new_table:
+ tbl_destroy(new_table, NULL);
+error:
+ return ERR_PTR(err);
}
-static void dp_free_table_and_flows_rcu(struct rcu_head *rcu)
+/**
+ * tbl_n_buckets - returns the number of buckets
+ * @table: table to examine
+ *
+ * Returns the number of buckets currently allocated in @table, useful when
+ * deciding whether to expand.
+ */
+int tbl_n_buckets(struct tbl *table)
{
- struct dp_table *table = container_of(rcu, struct dp_table, rcu);
- dp_table_destroy(table, 1);
+ return table->n_buckets;
}
-int dp_table_flush(struct datapath *dp)
+static void free_bucket_rcu(struct rcu_head *rcu)
{
- struct dp_table *old_table = rcu_dereference(dp->table);
- struct dp_table *new_table = dp_table_create(DP_L1_SIZE);
- if (!new_table)
- return -ENOMEM;
- rcu_assign_pointer(dp->table, new_table);
- call_rcu(&old_table->rcu, dp_free_table_and_flows_rcu);
- return 0;
+ struct tbl_bucket *bucket = container_of(rcu, struct tbl_bucket, rcu);
+ kfree(bucket);
}
-struct sw_flow **
-dp_table_lookup_for_insert(struct dp_table *table,
- const struct odp_flow_key *target)
+/**
+ * tbl_insert - insert object into table
+ * @table: table in which to insert object
+ * @target: tbl_node contained in object to insert
+ * @hash: hash of object to insert
+ *
+ * The caller must ensure that no object considered to be identical to @target
+ * already exists in @table. Returns 0 or a negative error (currently just
+ * -ENOMEM).
+ */
+int tbl_insert(struct tbl *table, struct tbl_node *target, u32 hash)
{
- struct sw_flow **buckets[2];
- struct sw_flow **empty_bucket = NULL;
- int i;
+ struct tbl_bucket __rcu **oldp = find_bucket(table, hash);
+ struct tbl_bucket *old = get_bucket_protected(*oldp);
+ unsigned int n = old ? old->n_objs : 0;
+ struct tbl_bucket *new = bucket_alloc(n + 1);
- find_buckets(table, target, buckets);
- for (i = 0; i < 2; i++) {
- struct sw_flow *f = rcu_dereference(*buckets[i]);
- if (f) {
- if (!memcmp(&f->key, target, sizeof(struct odp_flow_key)))
- return buckets[i];
- } else if (!empty_bucket)
- empty_bucket = buckets[i];
- }
- return empty_bucket;
+ if (!new)
+ return -ENOMEM;
+
+ target->hash = hash;
+
+ new->n_objs = n + 1;
+ if (old)
+ memcpy(new->objs, old->objs, n * sizeof(struct tbl_node *));
+ new->objs[n] = target;
+
+ rcu_assign_pointer(*oldp, new);
+ if (old)
+ call_rcu(&old->rcu, free_bucket_rcu);
+
+ table->count++;
+
+ return 0;
}
-int dp_table_delete(struct dp_table *table, struct sw_flow *target)
+/**
+ * tbl_remove - remove object from table
+ * @table: table from which to remove object
+ * @target: tbl_node inside of object to remove
+ *
+ * The caller must ensure that @target itself is in @table. (It is not
+ * good enough for @table to contain a different object considered identical
+ * @target.)
+ *
+ * Returns 0 or a negative error (currently just -ENOMEM). Yes, it *is*
+ * possible for object deletion to fail due to lack of memory.
+ */
+int tbl_remove(struct tbl *table, struct tbl_node *target)
{
- struct sw_flow **buckets[2];
- int i;
+ struct tbl_bucket __rcu **oldp = find_bucket(table, target->hash);
+ struct tbl_bucket *old = get_bucket_protected(*oldp);
+ unsigned int n = old->n_objs;
+ struct tbl_bucket *new;
+
+ if (n > 1) {
+ unsigned int i;
+
+ new = bucket_alloc(n - 1);
+ if (!new)
+ return -ENOMEM;
- find_buckets(table, &target->key, buckets);
- for (i = 0; i < 2; i++) {
- struct sw_flow *flow = rcu_dereference(*buckets[i]);
- if (flow == target) {
- rcu_assign_pointer(*buckets[i], NULL);
- return 0;
+ new->n_objs = 0;
+ for (i = 0; i < n; i++) {
+ struct tbl_node *obj = old->objs[i];
+ if (obj != target)
+ new->objs[new->n_objs++] = obj;
}
+ WARN_ON_ONCE(new->n_objs != n - 1);
+ } else {
+ new = NULL;
}
- return -ENOENT;
+
+ rcu_assign_pointer(*oldp, new);
+ call_rcu(&old->rcu, free_bucket_rcu);
+
+ table->count--;
+
+ return 0;
+}
+
+/**
+ * tbl_count - retrieves the number of stored objects
+ * @table: table to count
+ *
+ * Returns the number of objects that have been inserted into the hash table.
+ */
+unsigned int tbl_count(struct tbl *table)
+{
+ return table->count;
}