/* * Copyright (c) 2009 Nicira Networks. * Distributed under the terms of the GNU GPL version 2. * * Significant portions of this file may be copied from parts of the Linux * kernel, by Linus Torvalds and others. */ #include "flow.h" #include "datapath.h" #include #include #include #include #include #include #include #include static inline int bucket_size(int n_flows) { return sizeof(struct dp_bucket) + sizeof(struct sw_flow*) * n_flows; } static struct dp_bucket *dp_bucket_alloc(int n_flows) { return kmalloc(bucket_size(n_flows), GFP_KERNEL); } static void free_buckets(struct dp_bucket ***l1, unsigned int n_buckets, int free_flows) { unsigned int i; for (i = 0; i < n_buckets >> DP_L1_BITS; i++) { struct dp_bucket **l2 = l1[i]; unsigned int j; for (j = 0; j < DP_L1_SIZE; j++) { struct dp_bucket *bucket = l2[j]; if (!bucket) continue; if (free_flows) { unsigned int k; for (k = 0; k < bucket->n_flows; k++) flow_free(bucket->flows[k]); } kfree(bucket); } free_page((unsigned long)l2); } kfree(l1); } static struct dp_bucket ***alloc_buckets(unsigned int n_buckets) { struct dp_bucket ***l1; unsigned int i; l1 = kmalloc((n_buckets >> DP_L1_BITS) * sizeof(struct dp_bucket**), GFP_KERNEL); if (!l1) return NULL; for (i = 0; i < n_buckets >> DP_L1_BITS; i++) { l1[i] = (struct dp_bucket **)get_zeroed_page(GFP_KERNEL); if (!l1[i]) { free_buckets(l1, i << DP_L1_BITS, 0); return NULL; } } return l1; } /** * dp_table_create - create and return a new flow table * @n_buckets: number of buckets in the new table * * Creates and returns a new flow table, or %NULL if memory cannot be * allocated. @n_buckets must be a power of 2 in the range %DP_L1_SIZE to * %DP_MAX_BUCKETS. */ struct dp_table *dp_table_create(unsigned int n_buckets) { struct dp_table *table; table = kzalloc(sizeof *table, GFP_KERNEL); if (!table) goto err; table->n_buckets = n_buckets; table->buckets = alloc_buckets(n_buckets); if (!table->buckets) goto err_free_table; get_random_bytes(&table->hash_seed, sizeof table->hash_seed); return table; err_free_table: kfree(table); err: return NULL; } /** * dp_table_destroy - destroy flow table and optionally the flows it contains * @table: table to destroy (must not be %NULL) * @free_flows: whether to destroy the flows * * If @free_flows is zero, then the buckets in @table are destroyed but not the * flows within those buckets. This behavior is useful when a table is being * replaced by a larger or smaller one without destroying the flows. * * If @free_flows is nonzero, then the flows in @table are destroyed as well as * the buckets. */ void dp_table_destroy(struct dp_table *table, int free_flows) { free_buckets(table->buckets, table->n_buckets, free_flows); kfree(table); } static struct dp_bucket **find_bucket(struct dp_table *table, u32 hash) { unsigned int l1 = (hash & (table->n_buckets - 1)) >> DP_L1_SHIFT; unsigned int l2 = hash & ((1 << DP_L2_BITS) - 1); return &table->buckets[l1][l2]; } static int search_bucket(const struct dp_bucket *bucket, const struct odp_flow_key *key) { int i; for (i = 0; i < bucket->n_flows; i++) { struct sw_flow *flow = rcu_dereference(bucket->flows[i]); if (!memcmp(&flow->key, key, sizeof(struct odp_flow_key))) return i; } return -1; } static struct sw_flow *lookup_flow(struct dp_table *table, u32 hash, const struct odp_flow_key *key) { struct dp_bucket **bucketp = find_bucket(table, hash); struct dp_bucket *bucket = rcu_dereference(*bucketp); int index; if (!bucket) return NULL; index = search_bucket(bucket, key); if (index < 0) return NULL; return bucket->flows[index]; } static u32 flow_hash(const struct dp_table *table, const struct odp_flow_key *key) { return jhash2((u32*)key, sizeof *key / sizeof(u32), table->hash_seed); } /** * dp_table_lookup - searches flow table for a matching flow * @table: flow table to search * @key: flow key for which to search * * Searches @table for a flow whose key is equal to @key. Returns the flow if * successful, otherwise %NULL. */ struct sw_flow *dp_table_lookup(struct dp_table *table, const struct odp_flow_key *key) { return lookup_flow(table, flow_hash(table, key), key); } /** * dp_table_foreach - iterate through flow table * @table: table to iterate * @callback: function to call for each flow entry * @aux: Extra data to pass to @callback * * Iterates through all of the flows in @table in hash order, passing each of * them in turn to @callback. If @callback returns nonzero, this terminates * the iteration and dp_table_foreach() returns the same value. Returns 0 if * @callback never returns nonzero. * * This function does not try to intelligently handle the case where @callback * adds or removes flows in @table. */ int dp_table_foreach(struct dp_table *table, int (*callback)(struct sw_flow *flow, void *aux), void *aux) { unsigned int i, j, k; for (i = 0; i < table->n_buckets >> DP_L1_BITS; i++) { struct dp_bucket **l2 = table->buckets[i]; for (j = 0; j < DP_L1_SIZE; j++) { struct dp_bucket *bucket = rcu_dereference(l2[j]); if (!bucket) continue; for (k = 0; k < bucket->n_flows; k++) { int error = (*callback)(bucket->flows[k], aux); if (error) return error; } } } return 0; } static int insert_flow(struct sw_flow *flow, void *new_table_) { struct dp_table *new_table = new_table_; return dp_table_insert(new_table, flow); } static void dp_free_table_rcu(struct rcu_head *rcu) { struct dp_table *table = container_of(rcu, struct dp_table, rcu); dp_table_destroy(table, 0); } /** * dp_table_expand - replace datapath's flow table by one with more buckets * @dp: datapath to expand * * Replaces @dp's flow table by one that has twice as many buckets. All of the * flows in @dp's flow table are moved to the new flow table. Returns 0 if * successful, otherwise a negative error. */ int dp_table_expand(struct datapath *dp) { struct dp_table *old_table = rcu_dereference(dp->table); struct dp_table *new_table; new_table = dp_table_create(old_table->n_buckets * 2); if (!new_table) goto error; if (dp_table_foreach(old_table, insert_flow, new_table)) goto error_free_new_table; rcu_assign_pointer(dp->table, new_table); call_rcu(&old_table->rcu, dp_free_table_rcu); return 0; error_free_new_table: dp_table_destroy(new_table, 0); error: return -ENOMEM; } static void dp_free_table_and_flows_rcu(struct rcu_head *rcu) { struct dp_table *table = container_of(rcu, struct dp_table, rcu); dp_table_destroy(table, 1); } /** * dp_table_flush - clear datapath's flow table * @dp: datapath to clear * * Replaces @dp's flow table by an empty flow table, destroying all the flows * in the old table (after a suitable RCU grace period). */ int dp_table_flush(struct datapath *dp) { struct dp_table *old_table = rcu_dereference(dp->table); struct dp_table *new_table = dp_table_create(DP_L1_SIZE); if (!new_table) return -ENOMEM; rcu_assign_pointer(dp->table, new_table); call_rcu(&old_table->rcu, dp_free_table_and_flows_rcu); return 0; } static void dp_free_bucket_rcu(struct rcu_head *rcu) { struct dp_bucket *bucket = container_of(rcu, struct dp_bucket, rcu); kfree(bucket); } /** * dp_table_insert - insert flow into table * @table: table in which to insert flow * @target: flow to insert * * The caller must ensure that no flow with key identical to @target->key * already exists in @table. Returns 0 or a negative error (currently just * -ENOMEM). * * The caller is responsible for updating &struct datapath's n_flows member. */ int dp_table_insert(struct dp_table *table, struct sw_flow *target) { u32 hash = flow_hash(table, &target->key); struct dp_bucket **oldp = find_bucket(table, hash); struct dp_bucket *old = *rcu_dereference(oldp); unsigned int n = old ? old->n_flows : 0; struct dp_bucket *new = dp_bucket_alloc(n + 1); if (!new) return -ENOMEM; new->n_flows = n + 1; if (old) memcpy(new->flows, old->flows, n * sizeof(struct sw_flow*)); new->flows[n] = target; rcu_assign_pointer(*oldp, new); if (old) call_rcu(&old->rcu, dp_free_bucket_rcu); return 0; } /** * dp_table_delete - remove flow from table * @table: table from which to remove flow * @target: flow to remove * * The caller must ensure that @target itself is in @table. (It is not * good enough for @table to contain a different flow with a key equal to * @target's key.) * * Returns 0 or a negative error (currently just -ENOMEM). Yes, it *is* * possible for a flow deletion to fail due to lack of memory. * * The caller is responsible for updating &struct datapath's n_flows member. */ int dp_table_delete(struct dp_table *table, struct sw_flow *target) { u32 hash = flow_hash(table, &target->key); struct dp_bucket **oldp = find_bucket(table, hash); struct dp_bucket *old = *rcu_dereference(oldp); unsigned int n = old->n_flows; struct dp_bucket *new; if (n > 1) { unsigned int i; new = dp_bucket_alloc(n - 1); if (!new) return -ENOMEM; new->n_flows = 0; for (i = 0; i < n; i++) { struct sw_flow *flow = old->flows[i]; if (flow != target) new->flows[new->n_flows++] = flow; } WARN_ON_ONCE(new->n_flows != n - 1); } else { new = NULL; } rcu_assign_pointer(*oldp, new); call_rcu(&old->rcu, dp_free_bucket_rcu); return 0; }