Make dumping large numbers of flows possible.
[sliver-openvswitch.git] / datapath / table-hash.c
1 /*
2  * Distributed under the terms of the GNU GPL version 2.
3  * Copyright (c) 2007, 2008 The Board of Trustees of The Leland 
4  * Stanford Junior University
5  */
6
7 #include "table.h"
8 #include "crc32.h"
9 #include "flow.h"
10 #include "datapath.h"
11
12 #include <linux/slab.h>
13 #include <linux/vmalloc.h>
14 #include <linux/mm.h>
15 #include <linux/highmem.h>
16 #include <asm/pgtable.h>
17
18 static void *kmem_alloc(size_t);
19 static void *kmem_zalloc(size_t);
20 static void kmem_free(void *, size_t);
21
22 struct sw_table_hash {
23         struct sw_table swt;
24         spinlock_t lock;
25         struct crc32 crc32;
26         atomic_t n_flows;
27         unsigned int bucket_mask; /* Number of buckets minus 1. */
28         struct sw_flow **buckets;
29 };
30
31 static struct sw_flow **find_bucket(struct sw_table *swt,
32                                                                         const struct sw_flow_key *key)
33 {
34         struct sw_table_hash *th = (struct sw_table_hash *) swt;
35         unsigned int crc = crc32_calculate(&th->crc32, key, sizeof *key);
36         return &th->buckets[crc & th->bucket_mask];
37 }
38
39 static struct sw_flow *table_hash_lookup(struct sw_table *swt,
40                                                                                  const struct sw_flow_key *key)
41 {
42         struct sw_flow *flow = *find_bucket(swt, key);
43         return flow && !memcmp(&flow->key, key, sizeof *key) ? flow : NULL;
44 }
45
46 static int table_hash_insert(struct sw_table *swt, struct sw_flow *flow)
47 {
48         struct sw_table_hash *th = (struct sw_table_hash *) swt;
49         struct sw_flow **bucket;
50         unsigned long int flags;
51         int retval;
52
53         if (flow->key.wildcards != 0)
54                 return 0;
55
56         spin_lock_irqsave(&th->lock, flags);
57         bucket = find_bucket(swt, &flow->key);
58         if (*bucket == NULL) {
59                 atomic_inc(&th->n_flows);
60                 rcu_assign_pointer(*bucket, flow);
61                 retval = 1;
62         } else {
63                 struct sw_flow *old_flow = *bucket;
64                 if (!memcmp(&old_flow->key, &flow->key, sizeof flow->key)
65                                         && flow_del(old_flow)) {
66                         rcu_assign_pointer(*bucket, flow);
67                         flow_deferred_free(old_flow);
68                         retval = 1;
69                 } else {
70                         retval = 0;
71                 }
72         }
73         spin_unlock_irqrestore(&th->lock, flags);
74         return retval;
75 }
76
77 /* Caller must update n_flows. */
78 static int do_delete(struct sw_flow **bucket, struct sw_flow *flow)
79 {
80         if (flow_del(flow)) {
81                 rcu_assign_pointer(*bucket, NULL);
82                 flow_deferred_free(flow);
83                 return 1;
84         }
85         return 0;
86 }
87
88 /* Returns number of deleted flows. */
89 static int table_hash_delete(struct sw_table *swt,
90                                                          const struct sw_flow_key *key, int strict)
91 {
92         struct sw_table_hash *th = (struct sw_table_hash *) swt;
93         unsigned int count = 0;
94
95         if (key->wildcards == 0) {
96                 struct sw_flow **bucket = find_bucket(swt, key);
97                 struct sw_flow *flow = *bucket;
98                 if (flow && !memcmp(&flow->key, key, sizeof *key))
99                         count = do_delete(bucket, flow);
100         } else {
101                 unsigned int i;
102
103                 for (i = 0; i <= th->bucket_mask; i++) {
104                         struct sw_flow **bucket = &th->buckets[i];
105                         struct sw_flow *flow = *bucket;
106                         if (flow && flow_del_matches(&flow->key, key, strict))
107                                 count += do_delete(bucket, flow);
108                 }
109         }
110         if (count)
111                 atomic_sub(count, &th->n_flows);
112         return count;
113 }
114
115 static int table_hash_timeout(struct datapath *dp, struct sw_table *swt)
116 {
117         struct sw_table_hash *th = (struct sw_table_hash *) swt;
118         unsigned int i;
119         int count = 0;
120
121         for (i = 0; i <= th->bucket_mask; i++) {
122                 struct sw_flow **bucket = &th->buckets[i];
123                 struct sw_flow *flow = *bucket;
124                 if (flow && flow_timeout(flow)) {
125                         count += do_delete(bucket, flow); 
126                         if (dp->flags & OFPC_SEND_FLOW_EXP)
127                                 dp_send_flow_expired(dp, flow);
128                 }
129         }
130
131         if (count)
132                 atomic_sub(count, &th->n_flows);
133         return count;
134 }
135
136 static void table_hash_destroy(struct sw_table *swt)
137 {
138         struct sw_table_hash *th = (struct sw_table_hash *) swt;
139         unsigned int i;
140         for (i = 0; i <= th->bucket_mask; i++)
141         if (th->buckets[i])
142                 flow_free(th->buckets[i]);
143         kmem_free(th->buckets, (th->bucket_mask + 1) * sizeof *th->buckets);
144         kfree(th);
145 }
146
147 static int table_hash_iterate(struct sw_table *swt,
148                               const struct sw_flow_key *key,
149                               struct sw_table_position *position,
150                               int (*callback)(struct sw_flow *, void *private),
151                               void *private) 
152 {
153         struct sw_table_hash *th = (struct sw_table_hash *) swt;
154
155         if (position->private[0] > th->bucket_mask)
156                 return 0;
157
158         if (key->wildcards == 0) {
159                 struct sw_flow *flow = table_hash_lookup(swt, key);
160                 position->private[0] = -1;
161                 return flow ? callback(flow, private) : 0;
162         } else {
163                 int i;
164
165                 for (i = position->private[0]; i <= th->bucket_mask; i++) {
166                         struct sw_flow *flow = th->buckets[i];
167                         if (flow && flow_matches(key, &flow->key)) {
168                                 int error = callback(flow, private);
169                                 if (error) {
170                                         position->private[0] = i + 1;
171                                         return error;
172                                 }
173                         }
174                 }
175                 return 0;
176         }
177 }
178 static void table_hash_stats(struct sw_table *swt,
179                                  struct sw_table_stats *stats) 
180 {
181         struct sw_table_hash *th = (struct sw_table_hash *) swt;
182         stats->name = "hash";
183         stats->n_flows = atomic_read(&th->n_flows);
184         stats->max_flows = th->bucket_mask + 1;
185 }
186
187 struct sw_table *table_hash_create(unsigned int polynomial,
188                         unsigned int n_buckets)
189 {
190         struct sw_table_hash *th;
191         struct sw_table *swt;
192
193         th = kmalloc(sizeof *th, GFP_KERNEL);
194         if (th == NULL)
195                 return NULL;
196
197         BUG_ON(n_buckets & (n_buckets - 1));
198         th->buckets = kmem_zalloc(n_buckets * sizeof *th->buckets);
199         if (th->buckets == NULL) {
200                 printk("failed to allocate %u buckets\n", n_buckets);
201                 kfree(th);
202                 return NULL;
203         }
204         th->bucket_mask = n_buckets - 1;
205
206         swt = &th->swt;
207         swt->lookup = table_hash_lookup;
208         swt->insert = table_hash_insert;
209         swt->delete = table_hash_delete;
210         swt->timeout = table_hash_timeout;
211         swt->destroy = table_hash_destroy;
212         swt->iterate = table_hash_iterate;
213         swt->stats = table_hash_stats;
214
215         spin_lock_init(&th->lock);
216         crc32_init(&th->crc32, polynomial);
217         atomic_set(&th->n_flows, 0);
218
219         return swt;
220 }
221
222 /* Double-hashing table. */
223
224 struct sw_table_hash2 {
225         struct sw_table swt;
226         struct sw_table *subtable[2];
227 };
228
229 static struct sw_flow *table_hash2_lookup(struct sw_table *swt,
230                                                                                   const struct sw_flow_key *key)
231 {
232         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
233         int i;
234         
235         for (i = 0; i < 2; i++) {
236                 struct sw_flow *flow = *find_bucket(t2->subtable[i], key);
237                 if (flow && !memcmp(&flow->key, key, sizeof *key))
238                         return flow;
239         }
240         return NULL;
241 }
242
243 static int table_hash2_insert(struct sw_table *swt, struct sw_flow *flow)
244 {
245         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
246
247         if (table_hash_insert(t2->subtable[0], flow))
248                 return 1;
249         return table_hash_insert(t2->subtable[1], flow);
250 }
251
252 static int table_hash2_delete(struct sw_table *swt,
253                                                           const struct sw_flow_key *key, int strict)
254 {
255         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
256         return (table_hash_delete(t2->subtable[0], key, strict)
257                         + table_hash_delete(t2->subtable[1], key, strict));
258 }
259
260 static int table_hash2_timeout(struct datapath *dp, struct sw_table *swt)
261 {
262         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
263         return (table_hash_timeout(dp, t2->subtable[0])
264                         + table_hash_timeout(dp, t2->subtable[1]));
265 }
266
267 static void table_hash2_destroy(struct sw_table *swt)
268 {
269         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
270         table_hash_destroy(t2->subtable[0]);
271         table_hash_destroy(t2->subtable[1]);
272         kfree(t2);
273 }
274
275 static int table_hash2_iterate(struct sw_table *swt,
276                                const struct sw_flow_key *key,
277                                struct sw_table_position *position,
278                                int (*callback)(struct sw_flow *, void *),
279                                void *private)
280 {
281         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
282         int i;
283
284         for (i = position->private[1]; i < 2; i++) {
285                 int error = table_hash_iterate(t2->subtable[i], key, position,
286                                                callback, private);
287                 if (error) {
288                         return error;
289                 }
290                 position->private[0] = 0;
291                 position->private[1]++;
292         }
293         return 0;
294 }
295
296 static void table_hash2_stats(struct sw_table *swt,
297                                  struct sw_table_stats *stats)
298 {
299         struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt;
300         struct sw_table_stats substats[2];
301         int i;
302
303         for (i = 0; i < 2; i++)
304                 table_hash_stats(t2->subtable[i], &substats[i]);
305         stats->name = "hash2";
306         stats->n_flows = substats[0].n_flows + substats[1].n_flows;
307         stats->max_flows = substats[0].max_flows + substats[1].max_flows;
308 }
309
310 struct sw_table *table_hash2_create(unsigned int poly0, unsigned int buckets0,
311                                                                         unsigned int poly1, unsigned int buckets1)
312
313 {
314         struct sw_table_hash2 *t2;
315         struct sw_table *swt;
316
317         t2 = kmalloc(sizeof *t2, GFP_KERNEL);
318         if (t2 == NULL)
319                 return NULL;
320
321         t2->subtable[0] = table_hash_create(poly0, buckets0);
322         if (t2->subtable[0] == NULL)
323                 goto out_free_t2;
324
325         t2->subtable[1] = table_hash_create(poly1, buckets1);
326         if (t2->subtable[1] == NULL)
327                 goto out_free_subtable0;
328
329         swt = &t2->swt;
330         swt->lookup = table_hash2_lookup;
331         swt->insert = table_hash2_insert;
332         swt->delete = table_hash2_delete;
333         swt->timeout = table_hash2_timeout;
334         swt->destroy = table_hash2_destroy;
335         swt->iterate = table_hash2_iterate;
336         swt->stats = table_hash2_stats;
337
338         return swt;
339
340 out_free_subtable0:
341         table_hash_destroy(t2->subtable[0]);
342 out_free_t2:
343         kfree(t2);
344         return NULL;
345 }
346
347 /* From fs/xfs/linux-2.4/kmem.c. */
348
349 static void *
350 kmem_alloc(size_t size)
351 {
352         void *ptr;
353
354 #ifdef KMALLOC_MAX_SIZE
355         if (size > KMALLOC_MAX_SIZE)
356                 return NULL;
357 #endif
358         ptr = kmalloc(size, GFP_KERNEL);
359         if (!ptr) {
360                 ptr = vmalloc(size);
361                 if (ptr)
362                         printk("openflow: used vmalloc for %lu bytes\n", 
363                                         (unsigned long)size);
364         }
365         return ptr;
366 }
367
368 static void *
369 kmem_zalloc(size_t size)
370 {
371         void *ptr = kmem_alloc(size);
372         if (ptr)
373                 memset(ptr, 0, size);
374         return ptr;
375 }
376
377 static void
378 kmem_free(void *ptr, size_t size)
379 {
380         if (((unsigned long)ptr < VMALLOC_START) ||
381                 ((unsigned long)ptr >= VMALLOC_END)) {
382                 kfree(ptr);
383         } else {
384                 vfree(ptr);
385         }
386 }