By more liberal (egads!) about allocating space for netlink messages.
[sliver-openvswitch.git] / datapath / datapath.c
1 /*
2  * Distributed under the terms of the GNU GPL version 2.
3  * Copyright (c) 2007, 2008 The Board of Trustees of The Leland 
4  * Stanford Junior University
5  */
6
7 /* Functions for managing the dp interface/device. */
8
9 #include <linux/module.h>
10 #include <linux/if_arp.h>
11 #include <linux/if_bridge.h>
12 #include <linux/if_vlan.h>
13 #include <linux/in.h>
14 #include <net/genetlink.h>
15 #include <linux/ip.h>
16 #include <linux/delay.h>
17 #include <linux/etherdevice.h>
18 #include <linux/kernel.h>
19 #include <linux/kthread.h>
20 #include <linux/mutex.h>
21 #include <linux/rtnetlink.h>
22 #include <linux/rcupdate.h>
23 #include <linux/version.h>
24 #include <linux/ethtool.h>
25 #include <linux/random.h>
26 #include <asm/system.h>
27 #include <linux/netfilter_bridge.h>
28 #include <linux/inetdevice.h>
29 #include <linux/list.h>
30
31 #include "openflow-netlink.h"
32 #include "datapath.h"
33 #include "table.h"
34 #include "chain.h"
35 #include "forward.h"
36 #include "flow.h"
37 #include "datapath_t.h"
38
39 #include "compat.h"
40
41
42 /* Number of milliseconds between runs of the maintenance thread. */
43 #define MAINT_SLEEP_MSECS 1000
44
45 #define BRIDGE_PORT_NO_FLOOD    0x00000001 
46
47 #define UINT32_MAX                        4294967295U
48 #define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
49
50 struct net_bridge_port {
51         u16     port_no;
52         u32 flags;
53         struct datapath *dp;
54         struct net_device *dev;
55         struct list_head node; /* Element in datapath.ports. */
56 };
57
58 static struct genl_family dp_genl_family;
59 static struct genl_multicast_group mc_group;
60
61 int dp_dev_setup(struct net_device *dev);  
62
63 /* It's hard to imagine wanting more than one datapath, but... */
64 #define DP_MAX 32
65
66 /* datapaths.  Protected on the read side by rcu_read_lock, on the write side
67  * by dp_mutex.
68  *
69  * It is safe to access the datapath and net_bridge_port structures with just
70  * the dp_mutex, but to access the chain you need to take the rcu_read_lock
71  * also (because dp_mutex doesn't prevent flows from being destroyed).
72  */
73 static struct datapath *dps[DP_MAX];
74 static DEFINE_MUTEX(dp_mutex);
75
76 static int dp_maint_func(void *data);
77 static int send_port_status(struct net_bridge_port *p, uint8_t status);
78
79
80 /* nla_unreserve - reduce amount of space reserved by nla_reserve  
81  * @skb: socket buffer from which to recover room
82  * @nla: netlink attribute to adjust
83  * @len: amount by which to reduce attribute payload
84  *
85  * Reduces amount of space reserved by a call to nla_reserve.
86  *
87  * No other attributes may be added between calling nla_reserve and this
88  * function, since it will create a hole in the message.
89  */
90 void nla_unreserve(struct sk_buff *skb, struct nlattr *nla, int len)
91 {
92         skb->tail -= len;
93         skb->len  -= len;
94
95         nla->nla_len -= len;
96 }
97
98 /* Generates a unique datapath id.  It incorporates the datapath index
99  * and a hardware address, if available.  If not, it generates a random
100  * one.
101  */
102 static 
103 uint64_t gen_datapath_id(uint16_t dp_idx)
104 {
105         uint64_t id;
106         int i;
107         struct net_device *dev;
108
109         /* The top 16 bits are used to identify the datapath.  The lower 48 bits
110          * use an interface address.  */
111         id = (uint64_t)dp_idx << 48;
112         if ((dev = dev_get_by_name(&init_net, "ctl0")) 
113                         || (dev = dev_get_by_name(&init_net, "eth0"))) {
114                 for (i=0; i<ETH_ALEN; i++) {
115                         id |= (uint64_t)dev->dev_addr[i] << (8*(ETH_ALEN-1 - i));
116                 }
117                 dev_put(dev);
118         } else {
119                 /* Randomly choose the lower 48 bits if we cannot find an
120                  * address and mark the most significant bit to indicate that
121                  * this was randomly generated. */
122                 uint8_t rand[ETH_ALEN];
123                 get_random_bytes(rand, ETH_ALEN);
124                 id |= (uint64_t)1 << 63;
125                 for (i=0; i<ETH_ALEN; i++) {
126                         id |= (uint64_t)rand[i] << (8*(ETH_ALEN-1 - i));
127                 }
128         }
129
130         return id;
131 }
132
133 /* Creates a new datapath numbered 'dp_idx'.  Returns 0 for success or a
134  * negative error code.
135  *
136  * Not called with any locks. */
137 static int new_dp(int dp_idx)
138 {
139         struct datapath *dp;
140         int err;
141
142         if (dp_idx < 0 || dp_idx >= DP_MAX)
143                 return -EINVAL;
144
145         if (!try_module_get(THIS_MODULE))
146                 return -ENODEV;
147
148         mutex_lock(&dp_mutex);
149         dp = rcu_dereference(dps[dp_idx]);
150         if (dp != NULL) {
151                 err = -EEXIST;
152                 goto err_unlock;
153         }
154
155         err = -ENOMEM;
156         dp = kzalloc(sizeof *dp, GFP_KERNEL);
157         if (dp == NULL)
158                 goto err_unlock;
159
160         dp->dp_idx = dp_idx;
161         dp->id = gen_datapath_id(dp_idx);
162         dp->chain = chain_create(dp);
163         if (dp->chain == NULL)
164                 goto err_free_dp;
165         INIT_LIST_HEAD(&dp->port_list);
166
167 #if 0
168         /* Setup our "of" device */
169         dp->dev.priv = dp;
170         rtnl_lock();
171         err = dp_dev_setup(&dp->dev);
172         rtnl_unlock();
173         if (err != 0) 
174                 printk("datapath: problem setting up 'of' device\n");
175 #endif
176
177         dp->miss_send_len = OFP_DEFAULT_MISS_SEND_LEN;
178
179         dp->dp_task = kthread_run(dp_maint_func, dp, "dp%d", dp_idx);
180         if (IS_ERR(dp->dp_task))
181                 goto err_free_dp;
182
183         rcu_assign_pointer(dps[dp_idx], dp);
184         mutex_unlock(&dp_mutex);
185
186         return 0;
187
188 err_free_dp:
189         kfree(dp);
190 err_unlock:
191         mutex_unlock(&dp_mutex);
192         module_put(THIS_MODULE);
193                 return err;
194 }
195
196 /* Find and return a free port number under 'dp'.  Called under dp_mutex. */
197 static int find_portno(struct datapath *dp)
198 {
199         int i;
200         for (i = 0; i < OFPP_MAX; i++)
201                 if (dp->ports[i] == NULL)
202                         return i;
203         return -EXFULL;
204 }
205
206 static struct net_bridge_port *new_nbp(struct datapath *dp,
207                                                                            struct net_device *dev)
208 {
209         struct net_bridge_port *p;
210         int port_no;
211
212         port_no = find_portno(dp);
213         if (port_no < 0)
214                 return ERR_PTR(port_no);
215
216         p = kzalloc(sizeof(*p), GFP_KERNEL);
217         if (p == NULL)
218                 return ERR_PTR(-ENOMEM);
219
220         p->dp = dp;
221         dev_hold(dev);
222         p->dev = dev;
223         p->port_no = port_no;
224
225         return p;
226 }
227
228 /* Called with dp_mutex. */
229 int add_switch_port(struct datapath *dp, struct net_device *dev)
230 {
231         struct net_bridge_port *p;
232
233         if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER)
234                 return -EINVAL;
235
236         if (dev->br_port != NULL)
237                 return -EBUSY;
238
239         p = new_nbp(dp, dev);
240         if (IS_ERR(p))
241                 return PTR_ERR(p);
242
243         dev_hold(dev);
244         rcu_assign_pointer(dev->br_port, p);
245         rtnl_lock();
246         dev_set_promiscuity(dev, 1);
247         rtnl_unlock();
248
249         rcu_assign_pointer(dp->ports[p->port_no], p);
250         list_add_rcu(&p->node, &dp->port_list);
251
252         /* Notify the ctlpath that this port has been added */
253         send_port_status(p, OFPPR_ADD);
254
255         return 0;
256 }
257
258 /* Delete 'p' from switch.
259  * Called with dp_mutex. */
260 static int del_switch_port(struct net_bridge_port *p)
261 {
262         /* First drop references to device. */
263         rtnl_lock();
264         dev_set_promiscuity(p->dev, -1);
265         rtnl_unlock();
266         list_del_rcu(&p->node);
267         rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
268         rcu_assign_pointer(p->dev->br_port, NULL);
269
270         /* Then wait until no one is still using it, and destroy it. */
271         synchronize_rcu();
272
273         /* Notify the ctlpath that this port no longer exists */
274         send_port_status(p, OFPPR_DELETE);
275
276         dev_put(p->dev);
277         kfree(p);
278
279         return 0;
280 }
281
282 /* Called with dp_mutex. */
283 static void del_dp(struct datapath *dp)
284 {
285         struct net_bridge_port *p, *n;
286
287 #if 0
288         /* Unregister the "of" device of this dp */
289         rtnl_lock();
290         unregister_netdevice(&dp->dev);
291         rtnl_unlock();
292 #endif
293
294         kthread_stop(dp->dp_task);
295
296         /* Drop references to DP. */
297         list_for_each_entry_safe (p, n, &dp->port_list, node)
298                 del_switch_port(p);
299         rcu_assign_pointer(dps[dp->dp_idx], NULL);
300
301         /* Wait until no longer in use, then destroy it. */
302         synchronize_rcu();
303         chain_destroy(dp->chain);
304         kfree(dp);
305         module_put(THIS_MODULE);
306 }
307
308 static int dp_maint_func(void *data)
309 {
310         struct datapath *dp = (struct datapath *) data;
311
312         while (!kthread_should_stop()) {
313 #if 1
314                 chain_timeout(dp->chain);
315 #else
316                 int count = chain_timeout(dp->chain);
317                 chain_print_stats(dp->chain);
318                 if (count)
319                         printk("%d flows timed out\n", count);
320 #endif
321                 msleep_interruptible(MAINT_SLEEP_MSECS);
322         }
323                 
324         return 0;
325 }
326
327 /*
328  * Used as br_handle_frame_hook.  (Cannot run bridge at the same time, even on
329  * different set of devices!)  Returns 0 if *pskb should be processed further,
330  * 1 if *pskb is handled. */
331 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
332 /* Called with rcu_read_lock. */
333 static struct sk_buff *dp_frame_hook(struct net_bridge_port *p,
334                                          struct sk_buff *skb)
335 {
336         struct ethhdr *eh = eth_hdr(skb);
337         struct sk_buff *skb_local = NULL;
338
339
340         if (compare_ether_addr(eh->h_dest, skb->dev->dev_addr) == 0) 
341                 return skb;
342
343         if (is_broadcast_ether_addr(eh->h_dest)
344                                 || is_multicast_ether_addr(eh->h_dest)
345                                 || is_local_ether_addr(eh->h_dest)) 
346                 skb_local = skb_clone(skb, GFP_ATOMIC);
347
348         /* Push the Ethernet header back on. */
349         if (skb->protocol == htons(ETH_P_8021Q))
350                 skb_push(skb, VLAN_ETH_HLEN);
351         else
352                 skb_push(skb, ETH_HLEN);
353
354         fwd_port_input(p->dp->chain, skb, p->port_no);
355
356         return skb_local;
357 }
358 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
359 static int dp_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb)
360 {
361         /* Push the Ethernet header back on. */
362         if ((*pskb)->protocol == htons(ETH_P_8021Q))
363                 skb_push(*pskb, VLAN_ETH_HLEN);
364         else
365                 skb_push(*pskb, ETH_HLEN);
366
367         fwd_port_input(p->dp->chain, *pskb, p->port_no);
368         return 1;
369 }
370 #else 
371 /* NB: This has only been tested on 2.4.35 */
372
373 /* Called without any locks (?) */
374 static void dp_frame_hook(struct sk_buff *skb)
375 {
376         struct net_bridge_port *p = skb->dev->br_port;
377
378         /* Push the Ethernet header back on. */
379         if (skb->protocol == htons(ETH_P_8021Q))
380                 skb_push(skb, VLAN_ETH_HLEN);
381         else
382                 skb_push(skb, ETH_HLEN);
383
384         if (p) {
385                 rcu_read_lock();
386                 fwd_port_input(p->dp->chain, skb, p->port_no);
387                 rcu_read_unlock();
388         } else
389                 kfree_skb(skb);
390 }
391 #endif
392
393 /* Forwarding output path.
394  * Based on net/bridge/br_forward.c. */
395
396 /* Don't forward packets to originating port or with flooding disabled */
397 static inline int should_deliver(const struct net_bridge_port *p,
398                         const struct sk_buff *skb)
399 {
400         if ((skb->dev == p->dev) || (p->flags & BRIDGE_PORT_NO_FLOOD)) {
401                 return 0;
402         } 
403
404         return 1;
405 }
406
407 static inline unsigned packet_length(const struct sk_buff *skb)
408 {
409         int length = skb->len - ETH_HLEN;
410         if (skb->protocol == htons(ETH_P_8021Q))
411                 length -= VLAN_HLEN;
412         return length;
413 }
414
415 static int
416 flood(struct datapath *dp, struct sk_buff *skb)
417 {
418         struct net_bridge_port *p;
419         int prev_port;
420
421         prev_port = -1;
422         list_for_each_entry_rcu (p, &dp->port_list, node) {
423                 if (!should_deliver(p, skb))
424                         continue;
425                 if (prev_port != -1) {
426                         struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
427                         if (!clone) {
428                                 kfree_skb(skb);
429                                 return -ENOMEM;
430                         }
431                         dp_output_port(dp, clone, prev_port); 
432                 }
433                 prev_port = p->port_no;
434         }
435         if (prev_port != -1)
436                 dp_output_port(dp, skb, prev_port);
437         else
438                 kfree_skb(skb);
439
440         return 0;
441 }
442
443 /* Marks 'skb' as having originated from 'in_port' in 'dp'.
444    FIXME: how are devices reference counted? */
445 int dp_set_origin(struct datapath *dp, uint16_t in_port,
446                            struct sk_buff *skb)
447 {
448         if (in_port < OFPP_MAX && dp->ports[in_port]) {
449                 skb->dev = dp->ports[in_port]->dev;
450                 return 0;
451         }
452         return -ENOENT;
453 }
454
455 /* Takes ownership of 'skb' and transmits it to 'out_port' on 'dp'.
456  */
457 int dp_output_port(struct datapath *dp, struct sk_buff *skb, int out_port)
458 {
459         struct net_bridge_port *p;
460         int len = skb->len;
461
462         BUG_ON(!skb);
463         if (out_port == OFPP_FLOOD)
464                 return flood(dp, skb);
465         else if (out_port == OFPP_CONTROLLER)
466                 return dp_output_control(dp, skb, fwd_save_skb(skb), 0,
467                                                   OFPR_ACTION);
468         else if (out_port >= OFPP_MAX)
469                 goto bad_port;
470
471         p = dp->ports[out_port];
472         if (p == NULL)
473                 goto bad_port;
474
475         skb->dev = p->dev;
476         if (packet_length(skb) > skb->dev->mtu) {
477                 printk("dropped over-mtu packet: %d > %d\n",
478                                         packet_length(skb), skb->dev->mtu);
479                 kfree_skb(skb);
480                 return -E2BIG;
481         }
482
483         dev_queue_xmit(skb);
484
485         return len;
486
487 bad_port:
488         kfree_skb(skb);
489         if (net_ratelimit())
490                 printk("can't forward to bad port %d\n", out_port);
491         return -ENOENT;
492 }
493
494 /* Takes ownership of 'skb' and transmits it to 'dp''s control path.  If
495  * 'buffer_id' != -1, then only the first 64 bytes of 'skb' are sent;
496  * otherwise, all of 'skb' is sent.  'reason' indicates why 'skb' is being
497  * sent. 'max_len' sets the maximum number of bytes that the caller
498  * wants to be sent; a value of 0 indicates the entire packet should be
499  * sent. */
500 int
501 dp_output_control(struct datapath *dp, struct sk_buff *skb,
502                            uint32_t buffer_id, size_t max_len, int reason)
503 {
504         /* FIXME? packet_rcv_spkt in net/packet/af_packet.c does some stuff
505            that we should possibly be doing here too. */
506         /* FIXME?  Can we avoid creating a new skbuff in the case where we
507          * forward the whole packet? */
508         struct sk_buff *f_skb;
509         struct nlattr *attr;
510         struct ofp_packet_in *opi;
511         size_t opi_len;
512         size_t len, fwd_len;
513         void *data;
514         int err = -ENOMEM;
515
516         fwd_len = skb->len;
517         if ((buffer_id != (uint32_t) -1) && max_len)
518                 fwd_len = min(fwd_len, max_len);
519
520         len = nla_total_size(offsetof(struct ofp_packet_in, data) + fwd_len) 
521                                 + nla_total_size(sizeof(uint32_t));
522
523         f_skb = genlmsg_new(MAX(len, NLMSG_GOODSIZE), GFP_ATOMIC); 
524         if (!f_skb)
525                 goto error_free_skb;
526
527         data = genlmsg_put(f_skb, 0, 0, &dp_genl_family, 0,
528                                 DP_GENL_C_OPENFLOW);
529         if (data == NULL)
530                 goto error_free_f_skb;
531
532         NLA_PUT_U32(f_skb, DP_GENL_A_DP_IDX, dp->dp_idx);
533
534         opi_len = offsetof(struct ofp_packet_in, data) + fwd_len;
535         attr = nla_reserve(f_skb, DP_GENL_A_OPENFLOW, opi_len);
536         if (!attr)
537                 goto error_free_f_skb;
538         opi = nla_data(attr);
539         opi->header.version = OFP_VERSION;
540         opi->header.type    = OFPT_PACKET_IN;
541         opi->header.length  = htons(opi_len);
542         opi->header.xid     = htonl(0);
543
544         opi->buffer_id      = htonl(buffer_id);
545         opi->total_len      = htons(skb->len);
546         opi->in_port        = htons(skb->dev->br_port->port_no);
547         opi->reason         = reason;
548         opi->pad            = 0;
549         SKB_LINEAR_ASSERT(skb);
550         memcpy(opi->data, skb_mac_header(skb), fwd_len);
551
552         err = genlmsg_end(f_skb, data);
553         if (err < 0)
554                 goto error_free_f_skb;
555
556         err = genlmsg_multicast(f_skb, 0, mc_group.id, GFP_ATOMIC);
557         if (err && net_ratelimit())
558                 printk(KERN_WARNING "dp_output_control: genlmsg_multicast failed: %d\n", err);
559
560         kfree_skb(skb);  
561
562         return err;
563
564 nla_put_failure:
565 error_free_f_skb:
566         nlmsg_free(f_skb);
567 error_free_skb:
568         kfree_skb(skb);
569         if (net_ratelimit())
570                 printk(KERN_ERR "dp_output_control: failed to send: %d\n", err);
571         return err;
572 }
573
574 static void fill_port_desc(struct net_bridge_port *p, struct ofp_phy_port *desc)
575 {
576         desc->port_no = htons(p->port_no);
577         strncpy(desc->name, p->dev->name, OFP_MAX_PORT_NAME_LEN);
578         desc->name[OFP_MAX_PORT_NAME_LEN-1] = '\0';
579         memcpy(desc->hw_addr, p->dev->dev_addr, ETH_ALEN);
580         desc->flags = htonl(p->flags);
581         desc->features = 0;
582         desc->speed = 0;
583
584 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,24)
585         if (p->dev->ethtool_ops && p->dev->ethtool_ops->get_settings) {
586                 struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
587
588                 if (!p->dev->ethtool_ops->get_settings(p->dev, &ecmd)) {
589                         if (ecmd.supported & SUPPORTED_10baseT_Half) 
590                                 desc->features |= OFPPF_10MB_HD;
591                         if (ecmd.supported & SUPPORTED_10baseT_Full)
592                                 desc->features |= OFPPF_10MB_FD;
593                         if (ecmd.supported & SUPPORTED_100baseT_Half) 
594                                 desc->features |= OFPPF_100MB_HD;
595                         if (ecmd.supported & SUPPORTED_100baseT_Full)
596                                 desc->features |= OFPPF_100MB_FD;
597                         if (ecmd.supported & SUPPORTED_1000baseT_Half)
598                                 desc->features |= OFPPF_1GB_HD;
599                         if (ecmd.supported & SUPPORTED_1000baseT_Full)
600                                 desc->features |= OFPPF_1GB_FD;
601                         /* 10Gbps half-duplex doesn't exist... */
602                         if (ecmd.supported & SUPPORTED_10000baseT_Full)
603                                 desc->features |= OFPPF_10GB_FD;
604
605                         desc->features = htonl(desc->features);
606                         desc->speed = htonl(ecmd.speed);
607                 }
608         }
609 #endif
610 }
611
612 static int 
613 fill_data_hello(struct datapath *dp, struct ofp_data_hello *odh)
614 {
615         struct net_bridge_port *p;
616         int port_count = 0;
617
618         odh->header.version = OFP_VERSION;
619         odh->header.type    = OFPT_DATA_HELLO;
620         odh->header.xid     = htonl(0);
621         odh->datapath_id    = cpu_to_be64(dp->id); 
622
623         odh->n_exact        = htonl(2 * TABLE_HASH_MAX_FLOWS);
624         odh->n_mac_only     = htonl(TABLE_MAC_MAX_FLOWS);
625         odh->n_compression  = 0;                                           /* Not supported */
626         odh->n_general      = htonl(TABLE_LINEAR_MAX_FLOWS);
627         odh->buffer_mb      = htonl(UINT32_MAX);
628         odh->n_buffers      = htonl(N_PKT_BUFFERS);
629         odh->capabilities   = htonl(OFP_SUPPORTED_CAPABILITIES);
630         odh->actions        = htonl(OFP_SUPPORTED_ACTIONS);
631         odh->miss_send_len  = htons(dp->miss_send_len); 
632
633         list_for_each_entry_rcu (p, &dp->port_list, node) {
634                 fill_port_desc(p, &odh->ports[port_count]);
635                 port_count++;
636         }
637
638         return port_count;
639 }
640
641 int
642 dp_send_hello(struct datapath *dp)
643 {
644         struct sk_buff *skb;
645         struct nlattr *attr;
646         struct ofp_data_hello *odh;
647         size_t odh_max_len, odh_len, port_max_len, len;
648         void *data;
649         int err = -ENOMEM;
650         int port_count;
651
652
653         /* Overallocate, since we can't reliably determine the number of
654          * ports a priori. */
655         port_max_len = sizeof(struct ofp_phy_port) * OFPP_MAX;
656
657         len = nla_total_size(sizeof(*odh) + port_max_len) 
658                                 + nla_total_size(sizeof(uint32_t));
659
660         skb = genlmsg_new(MAX(len, NLMSG_GOODSIZE), GFP_ATOMIC);
661         if (!skb) {
662                 if (net_ratelimit())
663                         printk("dp_send_hello: genlmsg_new failed\n");
664                 goto error;
665         }
666
667         data = genlmsg_put(skb, 0, 0, &dp_genl_family, 0,
668                            DP_GENL_C_OPENFLOW);
669         if (data == NULL) {
670                 if (net_ratelimit())
671                         printk("dp_send_hello: genlmsg_put failed\n");
672                 goto error;
673         }
674
675         NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, dp->dp_idx);
676
677         odh_max_len = sizeof(*odh) + port_max_len;
678         attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, odh_max_len);
679         if (!attr) {
680                 if (net_ratelimit())
681                         printk("dp_send_hello: nla_reserve failed\n");
682                 goto error;
683         }
684         odh = nla_data(attr);
685         port_count = fill_data_hello(dp, odh);
686
687         /* Only now that we know how many ports we've added can we say
688          * say something about the length. */
689         odh_len = sizeof(*odh) + (sizeof(struct ofp_phy_port) * port_count);
690         odh->header.length = htons(odh_len);
691
692         /* Take back the unused part that was reserved */
693         nla_unreserve(skb, attr, (odh_max_len - odh_len));
694
695         err = genlmsg_end(skb, data);
696         if (err < 0) {
697                 if (net_ratelimit())
698                         printk("dp_send_hello: genlmsg_end failed\n");
699                 goto error;
700         }
701
702         err = genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC);
703         if (err && net_ratelimit())
704                 printk(KERN_WARNING "dp_send_hello: genlmsg_multicast failed: %d\n", err);
705
706         return err;
707
708 nla_put_failure:
709 error:
710         kfree_skb(skb);
711         if (net_ratelimit())
712                 printk(KERN_ERR "dp_send_hello: failed to send: %d\n", err);
713         return err;
714 }
715
716 int
717 dp_update_port_flags(struct datapath *dp, const struct ofp_phy_port *opp)
718 {
719         struct net_bridge_port *p;
720
721         p = dp->ports[htons(opp->port_no)];
722
723         /* Make sure the port id hasn't changed since this was sent */
724         if (!p || memcmp(opp->hw_addr, p->dev->dev_addr, ETH_ALEN) != 0) 
725                 return -1;
726         
727         p->flags = htonl(opp->flags);
728
729         return 0;
730 }
731
732
733 static int
734 send_port_status(struct net_bridge_port *p, uint8_t status)
735 {
736         struct sk_buff *skb;
737         struct nlattr *attr;
738         struct ofp_port_status *ops;
739         void *data;
740         int err = -ENOMEM;
741
742
743         skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
744         if (!skb) {
745                 if (net_ratelimit())
746                         printk("send_port_status: genlmsg_new failed\n");
747                 goto error;
748         }
749
750         data = genlmsg_put(skb, 0, 0, &dp_genl_family, 0,
751                            DP_GENL_C_OPENFLOW);
752         if (data == NULL) {
753                 if (net_ratelimit())
754                         printk("send_port_status: genlmsg_put failed\n");
755                 goto error;
756         }
757
758         NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, p->dp->dp_idx);
759
760         attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, sizeof(*ops));
761         if (!attr) {
762                 if (net_ratelimit())
763                         printk("send_port_status: nla_reserve failed\n");
764                 goto error;
765         }
766
767         ops = nla_data(attr);
768         ops->header.version = OFP_VERSION;
769         ops->header.type    = OFPT_PORT_STATUS;
770         ops->header.length  = htons(sizeof(*ops));
771         ops->header.xid     = htonl(0);
772
773         ops->reason         = status;
774         fill_port_desc(p, &ops->desc);
775
776         err = genlmsg_end(skb, data);
777         if (err < 0) {
778                 if (net_ratelimit())
779                         printk("send_port_status: genlmsg_end failed\n");
780                 goto error;
781         }
782
783         err = genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC);
784         if (err && net_ratelimit())
785                 printk(KERN_WARNING "send_port_status: genlmsg_multicast failed: %d\n", err);
786
787         return err;
788
789 nla_put_failure:
790 error:
791         kfree_skb(skb);
792         if (net_ratelimit())
793                 printk(KERN_ERR "send_port_status: failed to send: %d\n", err);
794         return err;
795 }
796
797 int 
798 dp_send_flow_expired(struct datapath *dp, struct sw_flow *flow)
799 {
800         struct sk_buff *skb;
801         struct nlattr *attr;
802         struct ofp_flow_expired *ofe;
803         void *data;
804         unsigned long duration_j;
805         int err = -ENOMEM;
806
807
808         skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
809         if (!skb) {
810                 if (net_ratelimit())
811                         printk("dp_send_flow_expired: genlmsg_new failed\n");
812                 goto error;
813         }
814
815         data = genlmsg_put(skb, 0, 0, &dp_genl_family, 0,
816                            DP_GENL_C_OPENFLOW);
817         if (data == NULL) {
818                 if (net_ratelimit())
819                         printk("dp_send_flow_expired: genlmsg_put failed\n");
820                 goto error;
821         }
822
823         NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, dp->dp_idx);
824
825         attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, sizeof(*ofe));
826         if (!attr) {
827                 if (net_ratelimit())
828                         printk("dp_send_flow_expired: nla_reserve failed\n");
829                 goto error;
830         }
831
832         ofe = nla_data(attr);
833         ofe->header.version = OFP_VERSION;
834         ofe->header.type    = OFPT_FLOW_EXPIRED;
835         ofe->header.length  = htons(sizeof(*ofe));
836         ofe->header.xid     = htonl(0);
837
838         flow_fill_match(&ofe->match, &flow->key);
839         duration_j = (flow->timeout - HZ * flow->max_idle) - flow->init_time;
840         ofe->duration   = htonl(duration_j / HZ);
841         ofe->packet_count   = cpu_to_be64(flow->packet_count);
842         ofe->byte_count     = cpu_to_be64(flow->byte_count);
843
844         err = genlmsg_end(skb, data);
845         if (err < 0) {
846                 if (net_ratelimit())
847                         printk("dp_send_flow_expired: genlmsg_end failed\n");
848                 goto error;
849         }
850
851         err = genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC);
852         if (err && net_ratelimit())
853                 printk(KERN_WARNING "send_flow_expired: genlmsg_multicast failed: %d\n", err);
854
855         return err;
856
857 nla_put_failure:
858 error:
859         kfree_skb(skb);
860         if (net_ratelimit())
861                 printk(KERN_ERR "send_flow_expired: failed to send: %d\n", err);
862         return err;
863 }
864
865 /* Generic Netlink interface.
866  *
867  * See netlink(7) for an introduction to netlink.  See
868  * http://linux-net.osdl.org/index.php/Netlink for more information and
869  * pointers on how to work with netlink and Generic Netlink in the kernel and
870  * in userspace. */
871
872 static struct genl_family dp_genl_family = {
873         .id = GENL_ID_GENERATE,
874         .hdrsize = 0,
875         .name = DP_GENL_FAMILY_NAME,
876         .version = 1,
877         .maxattr = DP_GENL_A_MAX,
878 };
879
880 /* Attribute policy: what each attribute may contain.  */
881 static struct nla_policy dp_genl_policy[DP_GENL_A_MAX + 1] = {
882         [DP_GENL_A_DP_IDX] = { .type = NLA_U32 },
883         [DP_GENL_A_MC_GROUP] = { .type = NLA_U32 },
884         [DP_GENL_A_PORTNAME] = { .type = NLA_STRING }
885 };
886
887 static int dp_genl_add(struct sk_buff *skb, struct genl_info *info)
888 {
889         if (!info->attrs[DP_GENL_A_DP_IDX])
890                 return -EINVAL;
891
892         return new_dp(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
893 }
894
895 static struct genl_ops dp_genl_ops_add_dp = {
896         .cmd = DP_GENL_C_ADD_DP,
897         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
898         .policy = dp_genl_policy,
899         .doit = dp_genl_add,
900         .dumpit = NULL,
901 };
902
903 struct datapath *dp_get(int dp_idx)
904 {
905         if (dp_idx < 0 || dp_idx > DP_MAX)
906                 return NULL;
907         return rcu_dereference(dps[dp_idx]);
908 }
909
910 static int dp_genl_del(struct sk_buff *skb, struct genl_info *info)
911 {
912         struct datapath *dp;
913         int err;
914
915         if (!info->attrs[DP_GENL_A_DP_IDX])
916                 return -EINVAL;
917
918         mutex_lock(&dp_mutex);
919         dp = dp_get(nla_get_u32((info->attrs[DP_GENL_A_DP_IDX])));
920         if (!dp)
921                 err = -ENOENT;
922         else {
923                 del_dp(dp);
924                 err = 0;
925         }
926         mutex_unlock(&dp_mutex);
927         return err;
928 }
929
930 static struct genl_ops dp_genl_ops_del_dp = {
931         .cmd = DP_GENL_C_DEL_DP,
932         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
933         .policy = dp_genl_policy,
934         .doit = dp_genl_del,
935         .dumpit = NULL,
936 };
937
938 /* Queries a datapath for related information.  Currently the only relevant
939  * information is the datapath's multicast group ID.  Really we want one
940  * multicast group per datapath, but because of locking issues[*] we can't
941  * easily get one.  Thus, every datapath will currently return the same
942  * global multicast group ID, but in the future it would be nice to fix that.
943  *
944  * [*] dp_genl_add, to add a new datapath, is called under the genl_lock
945  *       mutex, and genl_register_mc_group, called to acquire a new multicast
946  *       group ID, also acquires genl_lock, thus deadlock.
947  */
948 static int dp_genl_query(struct sk_buff *skb, struct genl_info *info)
949 {
950         struct datapath *dp;
951         struct sk_buff *ans_skb = NULL;
952         int dp_idx;
953         int err = -ENOMEM;
954
955         if (!info->attrs[DP_GENL_A_DP_IDX])
956                 return -EINVAL;
957
958         rcu_read_lock();
959         dp_idx = nla_get_u32((info->attrs[DP_GENL_A_DP_IDX]));
960         dp = dp_get(dp_idx);
961         if (!dp)
962                 err = -ENOENT;
963         else {
964                 void *data;
965                 ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
966                 if (!ans_skb) {
967                         err = -ENOMEM;
968                         goto err;
969                 }
970                 data = genlmsg_put_reply(ans_skb, info, &dp_genl_family,
971                                          0, DP_GENL_C_QUERY_DP);
972                 if (data == NULL) {
973                         err = -ENOMEM;
974                         goto err;
975                 }
976                 NLA_PUT_U32(ans_skb, DP_GENL_A_DP_IDX, dp_idx);
977                 NLA_PUT_U32(ans_skb, DP_GENL_A_MC_GROUP, mc_group.id);
978
979                 genlmsg_end(ans_skb, data);
980                 err = genlmsg_reply(ans_skb, info);
981                 if (!err)
982                         ans_skb = NULL;
983         }
984 err:
985 nla_put_failure:
986         if (ans_skb)
987                 kfree_skb(ans_skb);
988         rcu_read_unlock();
989         return err;
990 }
991
992 /*
993  * Fill flow entry for nl flow query.  Called with rcu_lock  
994  *
995  */
996 static
997 int
998 dp_fill_flow(struct ofp_flow_mod* ofm, struct swt_iterator* iter)
999 {
1000         ofm->header.version  = OFP_VERSION;
1001         ofm->header.type     = OFPT_FLOW_MOD;
1002         ofm->header.length   = htons(sizeof(struct ofp_flow_mod) 
1003                                 + sizeof(ofm->actions[0]));
1004         ofm->header.xid      = htonl(0);
1005
1006         ofm->match.wildcards = htons(iter->flow->key.wildcards);
1007         ofm->match.in_port   = iter->flow->key.in_port;
1008         ofm->match.dl_vlan   = iter->flow->key.dl_vlan;
1009         memcpy(ofm->match.dl_src, iter->flow->key.dl_src, ETH_ALEN);
1010         memcpy(ofm->match.dl_dst, iter->flow->key.dl_dst, ETH_ALEN);
1011         ofm->match.dl_type   = iter->flow->key.dl_type;
1012         ofm->match.nw_src    = iter->flow->key.nw_src;
1013         ofm->match.nw_dst    = iter->flow->key.nw_dst;
1014         ofm->match.nw_proto  = iter->flow->key.nw_proto;
1015         ofm->match.tp_src    = iter->flow->key.tp_src;
1016         ofm->match.tp_dst    = iter->flow->key.tp_dst;
1017         ofm->group_id        = iter->flow->group_id;
1018         ofm->max_idle        = iter->flow->max_idle;
1019         /* TODO support multiple actions  */
1020         ofm->actions[0]      = iter->flow->actions[0];
1021
1022         return 0;
1023 }
1024
1025 static int dp_genl_show(struct sk_buff *skb, struct genl_info *info)
1026 {
1027         struct datapath *dp;
1028         int err = -ENOMEM;
1029         struct sk_buff *ans_skb = NULL;
1030         void *data;
1031         struct nlattr *attr;
1032         struct ofp_data_hello *odh;
1033         size_t odh_max_len, odh_len, port_max_len, len;
1034         int port_count;
1035
1036         if (!info->attrs[DP_GENL_A_DP_IDX])
1037                 return -EINVAL;
1038
1039         mutex_lock(&dp_mutex);
1040         dp = dp_get(nla_get_u32((info->attrs[DP_GENL_A_DP_IDX])));
1041         if (!dp)
1042                 goto error;
1043
1044         /* Overallocate, since we can't reliably determine the number of
1045          * ports a priori. */
1046         port_max_len = sizeof(struct ofp_phy_port) * OFPP_MAX;
1047
1048         len = nla_total_size(sizeof(*odh) + port_max_len)
1049                         + nla_total_size(sizeof(uint32_t));
1050
1051         ans_skb = nlmsg_new(MAX(len, NLMSG_GOODSIZE), GFP_KERNEL);
1052         if (!ans_skb)
1053                 goto error;
1054
1055         data = genlmsg_put_reply(ans_skb, info, &dp_genl_family,
1056                                  0, DP_GENL_C_SHOW_DP);
1057         if (data == NULL) 
1058                 goto error;
1059
1060         NLA_PUT_U32(ans_skb, DP_GENL_A_DP_IDX, dp->dp_idx);
1061
1062         odh_max_len = sizeof(*odh) + port_max_len;
1063         attr = nla_reserve(ans_skb, DP_GENL_A_DP_INFO, odh_max_len);
1064         if (!attr)
1065                 goto error;
1066         odh = nla_data(attr);
1067         port_count = fill_data_hello(dp, odh);
1068
1069         /* Only now that we know how many ports we've added can we say
1070          * say something about the length. */
1071         odh_len = sizeof(*odh) + (sizeof(struct ofp_phy_port) * port_count);
1072         odh->header.length = htons(odh_len);
1073
1074         /* Take back the unused part that was reserved */
1075         nla_unreserve(ans_skb, attr, (odh_max_len - odh_len));
1076
1077         genlmsg_end(ans_skb, data);
1078         err = genlmsg_reply(ans_skb, info);
1079         if (!err)
1080                 ans_skb = NULL;
1081
1082 error:
1083 nla_put_failure:
1084         if (ans_skb)
1085                 kfree_skb(ans_skb);
1086         mutex_unlock(&dp_mutex);
1087         return err;
1088 }
1089
1090 static struct genl_ops dp_genl_ops_show_dp = {
1091         .cmd = DP_GENL_C_SHOW_DP,
1092         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1093         .policy = dp_genl_policy,
1094         .doit = dp_genl_show,
1095         .dumpit = NULL,
1096 };
1097
1098 /* Convenience function */
1099 static
1100 void* 
1101 dp_init_nl_flow_msg(uint32_t dp_idx, uint16_t table_idx, 
1102                 struct genl_info *info, struct sk_buff* skb)
1103 {
1104         void* data;
1105
1106         data = genlmsg_put_reply(skb, info, &dp_genl_family, 0, 
1107                                 DP_GENL_C_QUERY_FLOW);
1108         if (data == NULL)
1109                 return NULL;
1110         NLA_PUT_U32(skb, DP_GENL_A_DP_IDX,   dp_idx);
1111         NLA_PUT_U16(skb, DP_GENL_A_TABLEIDX, table_idx);
1112
1113         return data;
1114
1115 nla_put_failure:
1116         return NULL;
1117 }
1118
1119 /*  Iterate through the specified table and send all flow entries over
1120  *  netlink to userspace.  Each flow message has the following format:
1121  *
1122  *  32bit dpix
1123  *  16bit tabletype
1124  *  32bit number of flows
1125  *  openflow-flow-entries
1126  *
1127  *  The full table may require multiple messages.  A message with 0 flows
1128  *  signifies end-of message.
1129  */
1130
1131 static 
1132 int 
1133 dp_dump_table(struct datapath *dp, uint16_t table_idx, struct genl_info *info, struct ofp_flow_mod* matchme) 
1134
1135         struct sk_buff  *skb = 0; 
1136         struct sw_table *table = 0;
1137         struct swt_iterator iter;
1138         struct sw_flow_key in_flow; 
1139         struct nlattr   *attr;
1140         int count = 0, sum_count = 0;
1141         void *data; 
1142         uint8_t* ofm_ptr = 0;
1143         struct nlattr   *num_attr; 
1144         int err = -ENOMEM;
1145
1146         table = dp->chain->tables[table_idx]; 
1147         if ( table == NULL ) {
1148                 dprintk("dp::dp_dump_table error, non-existant table at position %d\n", table_idx);
1149                 return -EINVAL;
1150         }
1151
1152         if (!table->iterator(table, &iter)) {
1153                 dprintk("dp::dp_dump_table couldn't initialize empty table iterator\n");
1154                 return -ENOMEM;
1155         }
1156
1157         while (iter.flow) {
1158
1159                 /* verify that we can fit all NL_FLOWS_PER_MESSAGE in a single
1160                  * sk_buf */
1161                 if( (sizeof(dp_genl_family) + sizeof(uint32_t) + sizeof(uint16_t) + sizeof(uint32_t) + 
1162                                         (NL_FLOWS_PER_MESSAGE * sizeof(struct ofp_flow_mod))) > (8192 - 64)){
1163                         dprintk("dp::dp_dump_table NL_FLOWS_PER_MESSAGE may cause overrun in skbuf\n");
1164                         return -ENOMEM;
1165                 }
1166
1167                 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
1168                 if (skb == NULL) {
1169                         return -ENOMEM;
1170                 }
1171
1172                 data = dp_init_nl_flow_msg(dp->dp_idx, table_idx, info, skb);
1173                 if (data == NULL){
1174                         err= -ENOMEM;   
1175                         goto error_free_skb;
1176                 } 
1177
1178                 /* reserve space to put the number of flows for this message, to
1179                  * be filled after the loop*/
1180                 num_attr = nla_reserve(skb, DP_GENL_A_NUMFLOWS, sizeof(uint32_t));
1181                 if(!num_attr){
1182                         err = -ENOMEM;
1183                         goto error_free_skb;
1184                 }
1185
1186                 /* Only load NL_FLOWS_PER_MESSAGE flows at a time */
1187                 attr = nla_reserve(skb, DP_GENL_A_FLOW, 
1188                                 (sizeof(struct ofp_flow_mod) + sizeof(struct ofp_action)) * NL_FLOWS_PER_MESSAGE);
1189                 if (!attr){
1190                         err = -ENOMEM;
1191                         goto error_free_skb;
1192                 }
1193
1194                 /* internal loop to fill NL_FLOWS_PER_MESSAGE flows */
1195                 ofm_ptr = nla_data(attr);
1196                 flow_extract_match(&in_flow, &matchme->match);
1197                 while (iter.flow && count < NL_FLOWS_PER_MESSAGE) {
1198                         if(flow_matches(&in_flow, &iter.flow->key)){
1199                                 if((err = dp_fill_flow((struct ofp_flow_mod*)ofm_ptr, &iter))) 
1200                                         goto error_free_skb;
1201                                 count++; 
1202                                 /* TODO support multiple actions  */
1203                                 ofm_ptr += sizeof(struct ofp_flow_mod) + sizeof(struct ofp_action);
1204                         }
1205                         table->iterator_next(&iter);
1206                 }
1207
1208                 *((uint32_t*)nla_data(num_attr)) = count;
1209                 genlmsg_end(skb, data); 
1210
1211                 sum_count += count; 
1212                 count = 0;
1213
1214                 err = genlmsg_unicast(skb, info->snd_pid); 
1215                 skb = 0;
1216         }
1217
1218         /* send a sentinal message saying we're done */
1219         skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
1220         if (skb == NULL) {
1221                 return -ENOMEM;
1222         }
1223         data = dp_init_nl_flow_msg(dp->dp_idx, table_idx, info, skb);
1224         if (data == NULL){
1225                 err= -ENOMEM;   
1226                 goto error_free_skb;
1227         } 
1228
1229         NLA_PUT_U32(skb, DP_GENL_A_NUMFLOWS,   0);
1230         /* dummy flow so nl doesn't complain */
1231         attr = nla_reserve(skb, DP_GENL_A_FLOW, sizeof(struct ofp_flow_mod));
1232         if (!attr){
1233                 err = -ENOMEM;
1234                 goto error_free_skb;
1235         }
1236         genlmsg_end(skb, data); 
1237         err = genlmsg_reply(skb, info); skb = 0;
1238
1239 nla_put_failure:
1240 error_free_skb:
1241         if(skb)
1242                 kfree_skb(skb);
1243         return err;
1244 }
1245
1246 /* Helper function to query_table which creates and sends a message packed with
1247  * table stats.  Message form is:
1248  *
1249  * u32 DP_IDX
1250  * u32 NUM_TABLES
1251  * OFP_TABLE (list of OFP_TABLES)
1252  *
1253  */
1254
1255 static 
1256 int 
1257 dp_dump_table_stats(struct datapath *dp, int dp_idx, struct genl_info *info) 
1258
1259         struct sk_buff   *skb = 0; 
1260         struct ofp_table *ot = 0;
1261         struct nlattr   *attr;
1262         struct sw_table_stats stats; 
1263         size_t len;
1264         void *data; 
1265         int err = -ENOMEM;
1266         int i = 0;
1267         int nt = dp->chain->n_tables;
1268
1269         len = 4 + 4 + (sizeof(struct ofp_table) * nt);
1270
1271         /* u32 IDX, u32 NUMTABLES, list-of-tables */
1272         skb = nlmsg_new(MAX(len, NLMSG_GOODSIZE), GFP_ATOMIC);
1273         if (skb == NULL) {
1274                 return -ENOMEM;
1275         }
1276         
1277         data = genlmsg_put_reply(skb, info, &dp_genl_family, 0, 
1278                                 DP_GENL_C_QUERY_TABLE);
1279         if (data == NULL){
1280                 return -ENOMEM;
1281         } 
1282
1283         NLA_PUT_U32(skb, DP_GENL_A_DP_IDX,      dp_idx);
1284         NLA_PUT_U32(skb, DP_GENL_A_NUMTABLES, nt);
1285
1286         /* ... we assume that all tables can fit in a single message.
1287          * Probably a reasonable assumption seeing that we only have
1288          * 3 atm */
1289         attr = nla_reserve(skb, DP_GENL_A_TABLE, (sizeof(struct ofp_table) * nt));
1290         if (!attr){
1291                 err = -ENOMEM;
1292                 goto error_free_skb;
1293         }
1294
1295         ot = nla_data(attr);
1296
1297         for (i = 0; i < nt; ++i) {
1298                 dp->chain->tables[i]->stats(dp->chain->tables[i], &stats);
1299                 ot->header.version = OFP_VERSION;
1300                 ot->header.type    = OFPT_TABLE;
1301                 ot->header.length  = htons(sizeof(struct ofp_table));
1302                 ot->header.xid     = htonl(0);
1303
1304                 strncpy(ot->name, stats.name, OFP_MAX_TABLE_NAME_LEN); 
1305                 ot->table_id  = htons(i);
1306                 ot->n_flows   = htonl(stats.n_flows);
1307                 ot->max_flows = htonl(stats.max_flows);
1308                 ot++;
1309         }
1310
1311         genlmsg_end(skb, data); 
1312         err = genlmsg_reply(skb, info); skb = 0;
1313
1314 nla_put_failure:
1315 error_free_skb:
1316         if(skb)
1317                 kfree_skb(skb);
1318         return err;
1319 }
1320
1321 /* 
1322  * Queries a datapath for flow-table statistics 
1323  */
1324
1325
1326 static int dp_genl_table_query(struct sk_buff *skb, struct genl_info *info)
1327 {
1328         struct   datapath* dp;
1329         int       err = 0;
1330
1331         if (!info->attrs[DP_GENL_A_DP_IDX]) {
1332                 dprintk("dp::dp_genl_table_query received message with missing attributes\n");
1333                 return -EINVAL;
1334         }
1335
1336         rcu_read_lock();
1337         dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
1338         if (!dp) {
1339                 err = -ENOENT;
1340                 goto err_out;
1341         }
1342
1343         err = dp_dump_table_stats(dp, nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]), info); 
1344
1345 err_out:
1346         rcu_read_unlock();
1347         return err;
1348 }
1349
1350 /* 
1351  * Queries a datapath for flow-table entries.
1352  */
1353
1354 static int dp_genl_flow_query(struct sk_buff *skb, struct genl_info *info)
1355 {
1356         struct datapath* dp;
1357         struct ofp_flow_mod*  ofm;
1358         u16     table_idx;
1359         int     err = 0;
1360
1361         if (!info->attrs[DP_GENL_A_DP_IDX]
1362                                 || !info->attrs[DP_GENL_A_TABLEIDX]
1363                                 || !info->attrs[DP_GENL_A_FLOW]) {
1364                 dprintk("dp::dp_genl_flow_query received message with missing attributes\n");
1365                 return -EINVAL;
1366         }
1367
1368         rcu_read_lock();
1369         dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
1370         if (!dp) {
1371                 err = -ENOENT;
1372                 goto err_out;
1373         }
1374
1375         table_idx = nla_get_u16(info->attrs[DP_GENL_A_TABLEIDX]);
1376
1377         if (dp->chain->n_tables <= table_idx){
1378                 printk("table index %d invalid (dp has %d tables)\n",
1379                                 table_idx, dp->chain->n_tables);
1380         err = -EINVAL;
1381                 goto err_out;
1382         }
1383
1384         ofm = nla_data(info->attrs[DP_GENL_A_FLOW]);
1385         err = dp_dump_table(dp, table_idx, info, ofm); 
1386
1387 err_out:
1388         rcu_read_unlock();
1389         return err;
1390 }
1391
1392 static struct nla_policy dp_genl_flow_policy[DP_GENL_A_MAX + 1] = {
1393         [DP_GENL_A_DP_IDX]      = { .type = NLA_U32 },
1394         [DP_GENL_A_TABLEIDX] = { .type = NLA_U16 },
1395         [DP_GENL_A_NUMFLOWS]  = { .type = NLA_U32 },
1396 };
1397
1398 static struct genl_ops dp_genl_ops_query_flow = {
1399         .cmd    = DP_GENL_C_QUERY_FLOW,
1400         .flags  = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1401         .policy = dp_genl_flow_policy,
1402         .doit   = dp_genl_flow_query,
1403         .dumpit = NULL,
1404 };
1405
1406 static struct nla_policy dp_genl_table_policy[DP_GENL_A_MAX + 1] = {
1407         [DP_GENL_A_DP_IDX]      = { .type = NLA_U32 },
1408 };
1409
1410 static struct genl_ops dp_genl_ops_query_table = {
1411         .cmd    = DP_GENL_C_QUERY_TABLE,
1412         .flags  = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1413         .policy = dp_genl_table_policy,
1414         .doit   = dp_genl_table_query,
1415         .dumpit = NULL,
1416 };
1417
1418
1419 static struct genl_ops dp_genl_ops_query_dp = {
1420         .cmd = DP_GENL_C_QUERY_DP,
1421         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1422         .policy = dp_genl_policy,
1423         .doit = dp_genl_query,
1424         .dumpit = NULL,
1425 };
1426
1427 static int dp_genl_add_del_port(struct sk_buff *skb, struct genl_info *info)
1428 {
1429         struct datapath *dp;
1430         struct net_device *port;
1431         int err;
1432
1433         if (!info->attrs[DP_GENL_A_DP_IDX] || !info->attrs[DP_GENL_A_PORTNAME])
1434                 return -EINVAL;
1435
1436         /* Get datapath. */
1437         mutex_lock(&dp_mutex);
1438         dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
1439         if (!dp) {
1440                 err = -ENOENT;
1441                 goto out;
1442         }
1443
1444         /* Get interface to add/remove. */
1445         port = dev_get_by_name(&init_net, 
1446                         nla_data(info->attrs[DP_GENL_A_PORTNAME]));
1447         if (!port) {
1448                 err = -ENOENT;
1449                 goto out;
1450         }
1451
1452         /* Execute operation. */
1453         if (info->genlhdr->cmd == DP_GENL_C_ADD_PORT)
1454                 err = add_switch_port(dp, port);
1455         else {
1456                 if (port->br_port == NULL || port->br_port->dp != dp) {
1457                         err = -ENOENT;
1458                         goto out_put;
1459                 }
1460                 err = del_switch_port(port->br_port);
1461         }
1462
1463 out_put:
1464         dev_put(port);
1465 out:
1466         mutex_unlock(&dp_mutex);
1467         return err;
1468 }
1469
1470 static struct genl_ops dp_genl_ops_add_port = {
1471         .cmd = DP_GENL_C_ADD_PORT,
1472         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1473         .policy = dp_genl_policy,
1474         .doit = dp_genl_add_del_port,
1475         .dumpit = NULL,
1476 };
1477
1478 static struct genl_ops dp_genl_ops_del_port = {
1479         .cmd = DP_GENL_C_DEL_PORT,
1480         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1481         .policy = dp_genl_policy,
1482         .doit = dp_genl_add_del_port,
1483         .dumpit = NULL,
1484 };
1485
1486 static int dp_genl_openflow(struct sk_buff *skb, struct genl_info *info)
1487 {
1488         struct nlattr *va = info->attrs[DP_GENL_A_OPENFLOW];
1489         struct datapath *dp;
1490         int err;
1491
1492         if (!info->attrs[DP_GENL_A_DP_IDX] || !va)
1493                 return -EINVAL;
1494
1495         rcu_read_lock();
1496         dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
1497         if (!dp) {
1498                 err = -ENOENT;
1499                 goto out;
1500         }
1501
1502         va = info->attrs[DP_GENL_A_OPENFLOW];
1503
1504         err = fwd_control_input(dp->chain, nla_data(va), nla_len(va));
1505
1506 out:
1507         rcu_read_unlock();
1508         return err;
1509 }
1510
1511 static struct nla_policy dp_genl_openflow_policy[DP_GENL_A_MAX + 1] = {
1512         [DP_GENL_A_DP_IDX] = { .type = NLA_U32 },
1513 };
1514
1515 static struct genl_ops dp_genl_ops_openflow = {
1516         .cmd = DP_GENL_C_OPENFLOW,
1517         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1518         .policy = dp_genl_openflow_policy,
1519         .doit = dp_genl_openflow,
1520         .dumpit = NULL,
1521 };
1522
1523 static struct nla_policy dp_genl_benchmark_policy[DP_GENL_A_MAX + 1] = {
1524         [DP_GENL_A_DP_IDX] = { .type = NLA_U32 },
1525         [DP_GENL_A_NPACKETS] = { .type = NLA_U32 },
1526         [DP_GENL_A_PSIZE] = { .type = NLA_U32 },
1527 };
1528
1529 static struct genl_ops dp_genl_ops_benchmark_nl = {
1530         .cmd = DP_GENL_C_BENCHMARK_NL,
1531         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1532         .policy = dp_genl_benchmark_policy,
1533         .doit = dp_genl_benchmark_nl,
1534         .dumpit = NULL,
1535 };
1536
1537 static struct genl_ops *dp_genl_all_ops[] = {
1538         /* Keep this operation first.  Generic Netlink dispatching
1539          * looks up operations with linear search, so we want it at the
1540          * front. */
1541         &dp_genl_ops_openflow,
1542
1543         &dp_genl_ops_query_flow,
1544         &dp_genl_ops_query_table,
1545         &dp_genl_ops_show_dp,
1546         &dp_genl_ops_add_dp,
1547         &dp_genl_ops_del_dp,
1548         &dp_genl_ops_query_dp,
1549         &dp_genl_ops_add_port,
1550         &dp_genl_ops_del_port,
1551         &dp_genl_ops_benchmark_nl,
1552 };
1553
1554 static int dp_init_netlink(void)
1555 {
1556         int err;
1557         int i;
1558
1559         err = genl_register_family(&dp_genl_family);
1560         if (err)
1561                 return err;
1562
1563         for (i = 0; i < ARRAY_SIZE(dp_genl_all_ops); i++) {
1564                 err = genl_register_ops(&dp_genl_family, dp_genl_all_ops[i]);
1565                 if (err)
1566                         goto err_unregister;
1567         }
1568
1569         strcpy(mc_group.name, "openflow");
1570         err = genl_register_mc_group(&dp_genl_family, &mc_group);
1571         if (err < 0)
1572                 goto err_unregister;
1573
1574         return 0;
1575
1576 err_unregister:
1577         genl_unregister_family(&dp_genl_family);
1578                 return err;
1579 }
1580
1581 static void dp_uninit_netlink(void)
1582 {
1583         genl_unregister_family(&dp_genl_family);
1584 }
1585
1586 #define DRV_NAME                "openflow"
1587 #define DRV_VERSION      VERSION
1588 #define DRV_DESCRIPTION "OpenFlow switching datapath implementation"
1589 #define DRV_COPYRIGHT   "Copyright (c) 2007, 2008 The Board of Trustees of The Leland Stanford Junior University"
1590
1591
1592 static int __init dp_init(void)
1593 {
1594         int err;
1595
1596         printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n");
1597         printk(KERN_INFO DRV_NAME ": " VERSION" built on "__DATE__" "__TIME__"\n");
1598         printk(KERN_INFO DRV_NAME ": " DRV_COPYRIGHT "\n");
1599
1600         err = flow_init();
1601         if (err)
1602                 goto error;
1603
1604         err = dp_init_netlink();
1605         if (err)
1606                 goto error_flow_exit;
1607
1608         /* Hook into callback used by the bridge to intercept packets.
1609          * Parasites we are. */
1610         if (br_handle_frame_hook)
1611                 printk("openflow: hijacking bridge hook\n");
1612         br_handle_frame_hook = dp_frame_hook;
1613
1614         return 0;
1615
1616 error_flow_exit:
1617         flow_exit();
1618 error:
1619         printk(KERN_EMERG "openflow: failed to install!");
1620         return err;
1621 }
1622
1623 static void dp_cleanup(void)
1624 {
1625         fwd_exit();
1626         dp_uninit_netlink();
1627         flow_exit();
1628         br_handle_frame_hook = NULL;
1629 }
1630
1631 module_init(dp_init);
1632 module_exit(dp_cleanup);
1633
1634 MODULE_DESCRIPTION(DRV_DESCRIPTION);
1635 MODULE_AUTHOR(DRV_COPYRIGHT);
1636 MODULE_LICENSE("GPL");