#define TBL_MIN_BUCKETS 1024
#define REHASH_INTERVAL (10 * 60 * HZ)
+#define MC_HASH_SHIFT 8
+#define MC_HASH_ENTRIES (1u << MC_HASH_SHIFT)
+#define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
+
static struct kmem_cache *flow_cache;
struct kmem_cache *flow_stats_cache __read_mostly;
{
struct table_instance *ti;
- ti = table_instance_alloc(TBL_MIN_BUCKETS);
+ table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
+ MC_HASH_ENTRIES, __alignof__(struct mask_cache_entry));
+ if (!table->mask_cache)
+ return -ENOMEM;
- if (!ti)
+ ti = table_instance_alloc(TBL_MIN_BUCKETS);
+ if (!ti) {
+ free_percpu(table->mask_cache);
return -ENOMEM;
+ }
rcu_assign_pointer(table->ti, ti);
INIT_LIST_HEAD(&table->mask_list);
{
struct table_instance *ti = (struct table_instance __force *)table->ti;
+ free_percpu(table->mask_cache);
table_instance_destroy(ti, false);
}
static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
const struct sw_flow_key *unmasked,
- struct sw_flow_mask *mask)
+ struct sw_flow_mask *mask,
+ u32 *n_mask_hit)
{
struct sw_flow *flow;
struct hlist_head *head;
ovs_flow_mask_key(&masked_key, unmasked, mask);
hash = flow_hash(&masked_key, key_start, key_end);
head = find_bucket(ti, hash);
+ (*n_mask_hit)++;
hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
if (flow->mask == mask && flow->hash == hash &&
flow_cmp_masked_key(flow, &masked_key,
return NULL;
}
-struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
- const struct sw_flow_key *key,
- u32 *n_mask_hit)
+
+static struct sw_flow *flow_lookup(struct flow_table *tbl,
+ struct table_instance *ti,
+ const struct sw_flow_key *key,
+ u32 *n_mask_hit)
{
- struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
struct sw_flow_mask *mask;
struct sw_flow *flow;
- *n_mask_hit = 0;
list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
- (*n_mask_hit)++;
- flow = masked_flow_lookup(ti, key, mask);
+ flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
if (flow) /* Found */
return flow;
}
return NULL;
}
+/*
+ * mask_cache maps flow to probable mask. This cache is not tightly
+ * coupled cache, It means updates to mask list can result in inconsistent
+ * cache entry in mask cache.
+ * This is per cpu cache and is divided in MC_HASH_SEGS segments.
+ * In case of a hash collision the entry is hashed in next segment.
+ * */
+struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
+ const struct sw_flow_key *key,
+ u32 skb_hash,
+ u32 *n_mask_hit)
+{
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ struct mask_cache_entry *entries, *ce, *del;
+ struct sw_flow *flow;
+ u32 hash = skb_hash;
+ int seg;
+
+ *n_mask_hit = 0;
+ if (unlikely(!skb_hash))
+ return flow_lookup(tbl, ti, key, n_mask_hit);
+
+ del = NULL;
+ entries = this_cpu_ptr(tbl->mask_cache);
+
+ for (seg = 0; seg < MC_HASH_SEGS; seg++) {
+ int index;
+
+ index = hash & (MC_HASH_ENTRIES - 1);
+ ce = &entries[index];
+
+ if (ce->skb_hash == skb_hash) {
+ struct sw_flow_mask *mask;
+ int i;
+
+ i = 0;
+ list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
+ if (ce->mask_index == i++) {
+ flow = masked_flow_lookup(ti, key, mask,
+ n_mask_hit);
+ if (flow) /* Found */
+ return flow;
+
+ break;
+ }
+ }
+ del = ce;
+ break;
+ }
+
+ if (!del || (del->skb_hash && !ce->skb_hash)) {
+ del = ce;
+ }
+
+ hash >>= MC_HASH_SHIFT;
+ }
+
+ flow = flow_lookup(tbl, ti, key, n_mask_hit);
+
+ if (flow) {
+ del->skb_hash = skb_hash;
+ del->mask_index = (*n_mask_hit - 1);
+ }
+ return flow;
+}
+
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
const struct sw_flow_key *key)
{
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
u32 __always_unused n_mask_hit;
- return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit);
+ n_mask_hit = 0;
+ return flow_lookup(tbl, ti, key, &n_mask_hit);
}
int ovs_flow_tbl_num_masks(const struct flow_table *table)
return -ENOMEM;
mask->key = new->key;
mask->range = new->range;
- list_add_rcu(&mask->list, &tbl->mask_list);
+ list_add_tail_rcu(&mask->list, &tbl->mask_list);
} else {
BUG_ON(!mask->ref_count);
mask->ref_count++;