Add support for OFPP_TABLE virtual port.
[sliver-openvswitch.git] / datapath / datapath.c
1 /*
2  * Distributed under the terms of the GNU GPL version 2.
3  * Copyright (c) 2007, 2008 The Board of Trustees of The Leland 
4  * Stanford Junior University
5  */
6
7 /* Functions for managing the dp interface/device. */
8
9 #include <linux/init.h>
10 #include <linux/module.h>
11 #include <linux/if_arp.h>
12 #include <linux/if_bridge.h>
13 #include <linux/if_vlan.h>
14 #include <linux/in.h>
15 #include <net/genetlink.h>
16 #include <linux/ip.h>
17 #include <linux/delay.h>
18 #include <linux/etherdevice.h>
19 #include <linux/kernel.h>
20 #include <linux/kthread.h>
21 #include <linux/mutex.h>
22 #include <linux/rtnetlink.h>
23 #include <linux/rcupdate.h>
24 #include <linux/version.h>
25 #include <linux/ethtool.h>
26 #include <linux/random.h>
27 #include <asm/system.h>
28 #include <linux/netfilter_bridge.h>
29 #include <linux/inetdevice.h>
30 #include <linux/list.h>
31
32 #include "openflow-netlink.h"
33 #include "datapath.h"
34 #include "table.h"
35 #include "chain.h"
36 #include "forward.h"
37 #include "flow.h"
38 #include "datapath_t.h"
39
40 #include "compat.h"
41
42
43 /* Number of milliseconds between runs of the maintenance thread. */
44 #define MAINT_SLEEP_MSECS 1000
45
46 #define BRIDGE_PORT_NO_FLOOD    0x00000001 
47
48 #define UINT32_MAX                        4294967295U
49 #define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
50
51 struct net_bridge_port {
52         u16     port_no;
53         u32 flags;
54         struct datapath *dp;
55         struct net_device *dev;
56         struct list_head node; /* Element in datapath.ports. */
57 };
58
59 static struct genl_family dp_genl_family;
60 static struct genl_multicast_group mc_group;
61
62 int dp_dev_setup(struct net_device *dev);  
63
64 /* It's hard to imagine wanting more than one datapath, but... */
65 #define DP_MAX 32
66
67 /* datapaths.  Protected on the read side by rcu_read_lock, on the write side
68  * by dp_mutex.
69  *
70  * It is safe to access the datapath and net_bridge_port structures with just
71  * the dp_mutex, but to access the chain you need to take the rcu_read_lock
72  * also (because dp_mutex doesn't prevent flows from being destroyed).
73  */
74 static struct datapath *dps[DP_MAX];
75 static DEFINE_MUTEX(dp_mutex);
76
77 static int dp_maint_func(void *data);
78 static int send_port_status(struct net_bridge_port *p, uint8_t status);
79
80
81 /* nla_unreserve - reduce amount of space reserved by nla_reserve  
82  * @skb: socket buffer from which to recover room
83  * @nla: netlink attribute to adjust
84  * @len: amount by which to reduce attribute payload
85  *
86  * Reduces amount of space reserved by a call to nla_reserve.
87  *
88  * No other attributes may be added between calling nla_reserve and this
89  * function, since it will create a hole in the message.
90  */
91 void nla_unreserve(struct sk_buff *skb, struct nlattr *nla, int len)
92 {
93         skb->tail -= len;
94         skb->len  -= len;
95
96         nla->nla_len -= len;
97 }
98
99 /* Generates a unique datapath id.  It incorporates the datapath index
100  * and a hardware address, if available.  If not, it generates a random
101  * one.
102  */
103 static 
104 uint64_t gen_datapath_id(uint16_t dp_idx)
105 {
106         uint64_t id;
107         int i;
108         struct net_device *dev;
109
110         /* The top 16 bits are used to identify the datapath.  The lower 48 bits
111          * use an interface address.  */
112         id = (uint64_t)dp_idx << 48;
113         if ((dev = dev_get_by_name(&init_net, "ctl0")) 
114                         || (dev = dev_get_by_name(&init_net, "eth0"))) {
115                 for (i=0; i<ETH_ALEN; i++) {
116                         id |= (uint64_t)dev->dev_addr[i] << (8*(ETH_ALEN-1 - i));
117                 }
118                 dev_put(dev);
119         } else {
120                 /* Randomly choose the lower 48 bits if we cannot find an
121                  * address and mark the most significant bit to indicate that
122                  * this was randomly generated. */
123                 uint8_t rand[ETH_ALEN];
124                 get_random_bytes(rand, ETH_ALEN);
125                 id |= (uint64_t)1 << 63;
126                 for (i=0; i<ETH_ALEN; i++) {
127                         id |= (uint64_t)rand[i] << (8*(ETH_ALEN-1 - i));
128                 }
129         }
130
131         return id;
132 }
133
134 /* Creates a new datapath numbered 'dp_idx'.  Returns 0 for success or a
135  * negative error code.
136  *
137  * Not called with any locks. */
138 static int new_dp(int dp_idx)
139 {
140         struct datapath *dp;
141         int err;
142
143         if (dp_idx < 0 || dp_idx >= DP_MAX)
144                 return -EINVAL;
145
146         if (!try_module_get(THIS_MODULE))
147                 return -ENODEV;
148
149         mutex_lock(&dp_mutex);
150         dp = rcu_dereference(dps[dp_idx]);
151         if (dp != NULL) {
152                 err = -EEXIST;
153                 goto err_unlock;
154         }
155
156         err = -ENOMEM;
157         dp = kzalloc(sizeof *dp, GFP_KERNEL);
158         if (dp == NULL)
159                 goto err_unlock;
160
161         dp->dp_idx = dp_idx;
162         dp->id = gen_datapath_id(dp_idx);
163         dp->chain = chain_create(dp);
164         if (dp->chain == NULL)
165                 goto err_free_dp;
166         INIT_LIST_HEAD(&dp->port_list);
167
168 #if 0
169         /* Setup our "of" device */
170         dp->dev.priv = dp;
171         rtnl_lock();
172         err = dp_dev_setup(&dp->dev);
173         rtnl_unlock();
174         if (err != 0) 
175                 printk("datapath: problem setting up 'of' device\n");
176 #endif
177
178         dp->miss_send_len = OFP_DEFAULT_MISS_SEND_LEN;
179
180         dp->dp_task = kthread_run(dp_maint_func, dp, "dp%d", dp_idx);
181         if (IS_ERR(dp->dp_task))
182                 goto err_free_dp;
183
184         rcu_assign_pointer(dps[dp_idx], dp);
185         mutex_unlock(&dp_mutex);
186
187         return 0;
188
189 err_free_dp:
190         kfree(dp);
191 err_unlock:
192         mutex_unlock(&dp_mutex);
193         module_put(THIS_MODULE);
194                 return err;
195 }
196
197 /* Find and return a free port number under 'dp'.  Called under dp_mutex. */
198 static int find_portno(struct datapath *dp)
199 {
200         int i;
201         for (i = 0; i < OFPP_MAX; i++)
202                 if (dp->ports[i] == NULL)
203                         return i;
204         return -EXFULL;
205 }
206
207 static struct net_bridge_port *new_nbp(struct datapath *dp,
208                                                                            struct net_device *dev)
209 {
210         struct net_bridge_port *p;
211         int port_no;
212
213         port_no = find_portno(dp);
214         if (port_no < 0)
215                 return ERR_PTR(port_no);
216
217         p = kzalloc(sizeof(*p), GFP_KERNEL);
218         if (p == NULL)
219                 return ERR_PTR(-ENOMEM);
220
221         p->dp = dp;
222         dev_hold(dev);
223         p->dev = dev;
224         p->port_no = port_no;
225
226         return p;
227 }
228
229 /* Called with dp_mutex. */
230 int add_switch_port(struct datapath *dp, struct net_device *dev)
231 {
232         struct net_bridge_port *p;
233
234         if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER)
235                 return -EINVAL;
236
237         if (dev->br_port != NULL)
238                 return -EBUSY;
239
240         p = new_nbp(dp, dev);
241         if (IS_ERR(p))
242                 return PTR_ERR(p);
243
244         dev_hold(dev);
245         rcu_assign_pointer(dev->br_port, p);
246         rtnl_lock();
247         dev_set_promiscuity(dev, 1);
248         rtnl_unlock();
249
250         rcu_assign_pointer(dp->ports[p->port_no], p);
251         list_add_rcu(&p->node, &dp->port_list);
252
253         /* Notify the ctlpath that this port has been added */
254         send_port_status(p, OFPPR_ADD);
255
256         return 0;
257 }
258
259 /* Delete 'p' from switch.
260  * Called with dp_mutex. */
261 static int del_switch_port(struct net_bridge_port *p)
262 {
263         /* First drop references to device. */
264         rtnl_lock();
265         dev_set_promiscuity(p->dev, -1);
266         rtnl_unlock();
267         list_del_rcu(&p->node);
268         rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
269         rcu_assign_pointer(p->dev->br_port, NULL);
270
271         /* Then wait until no one is still using it, and destroy it. */
272         synchronize_rcu();
273
274         /* Notify the ctlpath that this port no longer exists */
275         send_port_status(p, OFPPR_DELETE);
276
277         dev_put(p->dev);
278         kfree(p);
279
280         return 0;
281 }
282
283 /* Called with dp_mutex. */
284 static void del_dp(struct datapath *dp)
285 {
286         struct net_bridge_port *p, *n;
287
288 #if 0
289         /* Unregister the "of" device of this dp */
290         rtnl_lock();
291         unregister_netdevice(&dp->dev);
292         rtnl_unlock();
293 #endif
294
295         kthread_stop(dp->dp_task);
296
297         /* Drop references to DP. */
298         list_for_each_entry_safe (p, n, &dp->port_list, node)
299                 del_switch_port(p);
300         rcu_assign_pointer(dps[dp->dp_idx], NULL);
301
302         /* Wait until no longer in use, then destroy it. */
303         synchronize_rcu();
304         chain_destroy(dp->chain);
305         kfree(dp);
306         module_put(THIS_MODULE);
307 }
308
309 static int dp_maint_func(void *data)
310 {
311         struct datapath *dp = (struct datapath *) data;
312
313         while (!kthread_should_stop()) {
314 #if 1
315                 chain_timeout(dp->chain);
316 #else
317                 int count = chain_timeout(dp->chain);
318                 chain_print_stats(dp->chain);
319                 if (count)
320                         printk("%d flows timed out\n", count);
321 #endif
322                 msleep_interruptible(MAINT_SLEEP_MSECS);
323         }
324                 
325         return 0;
326 }
327
328 /*
329  * Used as br_handle_frame_hook.  (Cannot run bridge at the same time, even on
330  * different set of devices!)  Returns 0 if *pskb should be processed further,
331  * 1 if *pskb is handled. */
332 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
333 /* Called with rcu_read_lock. */
334 static struct sk_buff *dp_frame_hook(struct net_bridge_port *p,
335                                          struct sk_buff *skb)
336 {
337         struct ethhdr *eh = eth_hdr(skb);
338         struct sk_buff *skb_local = NULL;
339
340
341         if (compare_ether_addr(eh->h_dest, skb->dev->dev_addr) == 0) 
342                 return skb;
343
344         if (is_broadcast_ether_addr(eh->h_dest)
345                                 || is_multicast_ether_addr(eh->h_dest)
346                                 || is_local_ether_addr(eh->h_dest)) 
347                 skb_local = skb_clone(skb, GFP_ATOMIC);
348
349         /* Push the Ethernet header back on. */
350         if (skb->protocol == htons(ETH_P_8021Q))
351                 skb_push(skb, VLAN_ETH_HLEN);
352         else
353                 skb_push(skb, ETH_HLEN);
354
355         fwd_port_input(p->dp->chain, skb, p->port_no);
356
357         return skb_local;
358 }
359 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
360 static int dp_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb)
361 {
362         /* Push the Ethernet header back on. */
363         if ((*pskb)->protocol == htons(ETH_P_8021Q))
364                 skb_push(*pskb, VLAN_ETH_HLEN);
365         else
366                 skb_push(*pskb, ETH_HLEN);
367
368         fwd_port_input(p->dp->chain, *pskb, p->port_no);
369         return 1;
370 }
371 #else 
372 /* NB: This has only been tested on 2.4.35 */
373
374 /* Called without any locks (?) */
375 static void dp_frame_hook(struct sk_buff *skb)
376 {
377         struct net_bridge_port *p = skb->dev->br_port;
378
379         /* Push the Ethernet header back on. */
380         if (skb->protocol == htons(ETH_P_8021Q))
381                 skb_push(skb, VLAN_ETH_HLEN);
382         else
383                 skb_push(skb, ETH_HLEN);
384
385         if (p) {
386                 rcu_read_lock();
387                 fwd_port_input(p->dp->chain, skb, p->port_no);
388                 rcu_read_unlock();
389         } else
390                 kfree_skb(skb);
391 }
392 #endif
393
394 /* Forwarding output path.
395  * Based on net/bridge/br_forward.c. */
396
397 /* Don't forward packets to originating port or with flooding disabled */
398 static inline int should_deliver(const struct net_bridge_port *p,
399                         const struct sk_buff *skb)
400 {
401         if ((skb->dev == p->dev) || (p->flags & BRIDGE_PORT_NO_FLOOD)) {
402                 return 0;
403         } 
404
405         return 1;
406 }
407
408 static inline unsigned packet_length(const struct sk_buff *skb)
409 {
410         int length = skb->len - ETH_HLEN;
411         if (skb->protocol == htons(ETH_P_8021Q))
412                 length -= VLAN_HLEN;
413         return length;
414 }
415
416 static int
417 flood(struct datapath *dp, struct sk_buff *skb)
418 {
419         struct net_bridge_port *p;
420         int prev_port;
421
422         prev_port = -1;
423         list_for_each_entry_rcu (p, &dp->port_list, node) {
424                 if (!should_deliver(p, skb))
425                         continue;
426                 if (prev_port != -1) {
427                         struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
428                         if (!clone) {
429                                 kfree_skb(skb);
430                                 return -ENOMEM;
431                         }
432                         dp_output_port(dp, clone, prev_port); 
433                 }
434                 prev_port = p->port_no;
435         }
436         if (prev_port != -1)
437                 dp_output_port(dp, skb, prev_port);
438         else
439                 kfree_skb(skb);
440
441         return 0;
442 }
443
444 /* Marks 'skb' as having originated from 'in_port' in 'dp'.
445    FIXME: how are devices reference counted? */
446 int dp_set_origin(struct datapath *dp, uint16_t in_port,
447                            struct sk_buff *skb)
448 {
449         if (in_port < OFPP_MAX && dp->ports[in_port]) {
450                 skb->dev = dp->ports[in_port]->dev;
451                 return 0;
452         }
453         return -ENOENT;
454 }
455
456 /* Takes ownership of 'skb' and transmits it to 'out_port' on 'dp'.
457  */
458 int dp_output_port(struct datapath *dp, struct sk_buff *skb, int out_port)
459 {
460         struct net_bridge_port *p;
461         int len = skb->len;
462
463         BUG_ON(!skb);
464         if (out_port == OFPP_FLOOD)
465                 return flood(dp, skb);
466         else if (out_port == OFPP_CONTROLLER)
467                 return dp_output_control(dp, skb, fwd_save_skb(skb), 0,
468                                                   OFPR_ACTION);
469         else if (out_port == OFPP_TABLE) {
470                 struct sw_flow_key key;
471                 struct sw_flow *flow;
472
473                 flow_extract(skb, skb->dev->br_port->port_no, &key);
474                 flow = chain_lookup(dp->chain, &key);
475                 if (likely(flow != NULL)) {
476                         flow_used(flow, skb);
477                         execute_actions(dp, skb, &key, flow->actions, flow->n_actions);
478                         return 0;
479                 }
480                 return -ESRCH;
481         } else if (out_port >= OFPP_MAX)
482                 goto bad_port;
483
484         p = dp->ports[out_port];
485         if (p == NULL)
486                 goto bad_port;
487
488         skb->dev = p->dev;
489         if (packet_length(skb) > skb->dev->mtu) {
490                 printk("dropped over-mtu packet: %d > %d\n",
491                                         packet_length(skb), skb->dev->mtu);
492                 kfree_skb(skb);
493                 return -E2BIG;
494         }
495
496         dev_queue_xmit(skb);
497
498         return len;
499
500 bad_port:
501         kfree_skb(skb);
502         if (net_ratelimit())
503                 printk("can't forward to bad port %d\n", out_port);
504         return -ENOENT;
505 }
506
507 /* Takes ownership of 'skb' and transmits it to 'dp''s control path.  If
508  * 'buffer_id' != -1, then only the first 64 bytes of 'skb' are sent;
509  * otherwise, all of 'skb' is sent.  'reason' indicates why 'skb' is being
510  * sent. 'max_len' sets the maximum number of bytes that the caller
511  * wants to be sent; a value of 0 indicates the entire packet should be
512  * sent. */
513 int
514 dp_output_control(struct datapath *dp, struct sk_buff *skb,
515                            uint32_t buffer_id, size_t max_len, int reason)
516 {
517         /* FIXME? packet_rcv_spkt in net/packet/af_packet.c does some stuff
518            that we should possibly be doing here too. */
519         /* FIXME?  Can we avoid creating a new skbuff in the case where we
520          * forward the whole packet? */
521         struct sk_buff *f_skb;
522         struct nlattr *attr;
523         struct ofp_packet_in *opi;
524         size_t opi_len;
525         size_t len, fwd_len;
526         void *data;
527         int err = -ENOMEM;
528
529         fwd_len = skb->len;
530         if ((buffer_id != (uint32_t) -1) && max_len)
531                 fwd_len = min(fwd_len, max_len);
532
533         len = nla_total_size(offsetof(struct ofp_packet_in, data) + fwd_len) 
534                                 + nla_total_size(sizeof(uint32_t));
535
536         f_skb = genlmsg_new(MAX(len, NLMSG_GOODSIZE), GFP_ATOMIC); 
537         if (!f_skb)
538                 goto error_free_skb;
539
540         data = genlmsg_put(f_skb, 0, 0, &dp_genl_family, 0,
541                                 DP_GENL_C_OPENFLOW);
542         if (data == NULL)
543                 goto error_free_f_skb;
544
545         NLA_PUT_U32(f_skb, DP_GENL_A_DP_IDX, dp->dp_idx);
546
547         opi_len = offsetof(struct ofp_packet_in, data) + fwd_len;
548         attr = nla_reserve(f_skb, DP_GENL_A_OPENFLOW, opi_len);
549         if (!attr)
550                 goto error_free_f_skb;
551         opi = nla_data(attr);
552         opi->header.version = OFP_VERSION;
553         opi->header.type    = OFPT_PACKET_IN;
554         opi->header.length  = htons(opi_len);
555         opi->header.xid     = htonl(0);
556
557         opi->buffer_id      = htonl(buffer_id);
558         opi->total_len      = htons(skb->len);
559         opi->in_port        = htons(skb->dev->br_port->port_no);
560         opi->reason         = reason;
561         opi->pad            = 0;
562         SKB_LINEAR_ASSERT(skb);
563         memcpy(opi->data, skb_mac_header(skb), fwd_len);
564
565         err = genlmsg_end(f_skb, data);
566         if (err < 0)
567                 goto error_free_f_skb;
568
569         err = genlmsg_multicast(f_skb, 0, mc_group.id, GFP_ATOMIC);
570         if (err && net_ratelimit())
571                 printk(KERN_WARNING "dp_output_control: genlmsg_multicast failed: %d\n", err);
572
573         kfree_skb(skb);  
574
575         return err;
576
577 nla_put_failure:
578 error_free_f_skb:
579         nlmsg_free(f_skb);
580 error_free_skb:
581         kfree_skb(skb);
582         if (net_ratelimit())
583                 printk(KERN_ERR "dp_output_control: failed to send: %d\n", err);
584         return err;
585 }
586
587 static void fill_port_desc(struct net_bridge_port *p, struct ofp_phy_port *desc)
588 {
589         desc->port_no = htons(p->port_no);
590         strncpy(desc->name, p->dev->name, OFP_MAX_PORT_NAME_LEN);
591         desc->name[OFP_MAX_PORT_NAME_LEN-1] = '\0';
592         memcpy(desc->hw_addr, p->dev->dev_addr, ETH_ALEN);
593         desc->flags = htonl(p->flags);
594         desc->features = 0;
595         desc->speed = 0;
596
597 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,24)
598         if (p->dev->ethtool_ops && p->dev->ethtool_ops->get_settings) {
599                 struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
600
601                 if (!p->dev->ethtool_ops->get_settings(p->dev, &ecmd)) {
602                         if (ecmd.supported & SUPPORTED_10baseT_Half) 
603                                 desc->features |= OFPPF_10MB_HD;
604                         if (ecmd.supported & SUPPORTED_10baseT_Full)
605                                 desc->features |= OFPPF_10MB_FD;
606                         if (ecmd.supported & SUPPORTED_100baseT_Half) 
607                                 desc->features |= OFPPF_100MB_HD;
608                         if (ecmd.supported & SUPPORTED_100baseT_Full)
609                                 desc->features |= OFPPF_100MB_FD;
610                         if (ecmd.supported & SUPPORTED_1000baseT_Half)
611                                 desc->features |= OFPPF_1GB_HD;
612                         if (ecmd.supported & SUPPORTED_1000baseT_Full)
613                                 desc->features |= OFPPF_1GB_FD;
614                         /* 10Gbps half-duplex doesn't exist... */
615                         if (ecmd.supported & SUPPORTED_10000baseT_Full)
616                                 desc->features |= OFPPF_10GB_FD;
617
618                         desc->features = htonl(desc->features);
619                         desc->speed = htonl(ecmd.speed);
620                 }
621         }
622 #endif
623 }
624
625 static int 
626 fill_data_hello(struct datapath *dp, struct ofp_data_hello *odh)
627 {
628         struct net_bridge_port *p;
629         int port_count = 0;
630
631         odh->header.version = OFP_VERSION;
632         odh->header.type    = OFPT_DATA_HELLO;
633         odh->header.xid     = htonl(0);
634         odh->datapath_id    = cpu_to_be64(dp->id); 
635
636         odh->n_exact        = htonl(2 * TABLE_HASH_MAX_FLOWS);
637         odh->n_mac_only     = htonl(TABLE_MAC_MAX_FLOWS);
638         odh->n_compression  = 0;                                           /* Not supported */
639         odh->n_general      = htonl(TABLE_LINEAR_MAX_FLOWS);
640         odh->buffer_mb      = htonl(UINT32_MAX);
641         odh->n_buffers      = htonl(N_PKT_BUFFERS);
642         odh->capabilities   = htonl(OFP_SUPPORTED_CAPABILITIES);
643         odh->actions        = htonl(OFP_SUPPORTED_ACTIONS);
644         odh->miss_send_len  = htons(dp->miss_send_len); 
645
646         list_for_each_entry_rcu (p, &dp->port_list, node) {
647                 fill_port_desc(p, &odh->ports[port_count]);
648                 port_count++;
649         }
650
651         return port_count;
652 }
653
654 int
655 dp_send_hello(struct datapath *dp)
656 {
657         struct sk_buff *skb;
658         struct nlattr *attr;
659         struct ofp_data_hello *odh;
660         size_t odh_max_len, odh_len, port_max_len, len;
661         void *data;
662         int err = -ENOMEM;
663         int port_count;
664
665
666         /* Overallocate, since we can't reliably determine the number of
667          * ports a priori. */
668         port_max_len = sizeof(struct ofp_phy_port) * OFPP_MAX;
669
670         len = nla_total_size(sizeof(*odh) + port_max_len) 
671                                 + nla_total_size(sizeof(uint32_t));
672
673         skb = genlmsg_new(MAX(len, NLMSG_GOODSIZE), GFP_ATOMIC);
674         if (!skb) {
675                 if (net_ratelimit())
676                         printk("dp_send_hello: genlmsg_new failed\n");
677                 goto error;
678         }
679
680         data = genlmsg_put(skb, 0, 0, &dp_genl_family, 0,
681                            DP_GENL_C_OPENFLOW);
682         if (data == NULL) {
683                 if (net_ratelimit())
684                         printk("dp_send_hello: genlmsg_put failed\n");
685                 goto error;
686         }
687
688         NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, dp->dp_idx);
689
690         odh_max_len = sizeof(*odh) + port_max_len;
691         attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, odh_max_len);
692         if (!attr) {
693                 if (net_ratelimit())
694                         printk("dp_send_hello: nla_reserve failed\n");
695                 goto error;
696         }
697         odh = nla_data(attr);
698         port_count = fill_data_hello(dp, odh);
699
700         /* Only now that we know how many ports we've added can we say
701          * say something about the length. */
702         odh_len = sizeof(*odh) + (sizeof(struct ofp_phy_port) * port_count);
703         odh->header.length = htons(odh_len);
704
705         /* Take back the unused part that was reserved */
706         nla_unreserve(skb, attr, (odh_max_len - odh_len));
707
708         err = genlmsg_end(skb, data);
709         if (err < 0) {
710                 if (net_ratelimit())
711                         printk("dp_send_hello: genlmsg_end failed\n");
712                 goto error;
713         }
714
715         err = genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC);
716         if (err && net_ratelimit())
717                 printk(KERN_WARNING "dp_send_hello: genlmsg_multicast failed: %d\n", err);
718
719         return err;
720
721 nla_put_failure:
722 error:
723         kfree_skb(skb);
724         if (net_ratelimit())
725                 printk(KERN_ERR "dp_send_hello: failed to send: %d\n", err);
726         return err;
727 }
728
729 int
730 dp_update_port_flags(struct datapath *dp, const struct ofp_phy_port *opp)
731 {
732         struct net_bridge_port *p;
733
734         p = dp->ports[htons(opp->port_no)];
735
736         /* Make sure the port id hasn't changed since this was sent */
737         if (!p || memcmp(opp->hw_addr, p->dev->dev_addr, ETH_ALEN) != 0) 
738                 return -1;
739         
740         p->flags = htonl(opp->flags);
741
742         return 0;
743 }
744
745
746 static int
747 send_port_status(struct net_bridge_port *p, uint8_t status)
748 {
749         struct sk_buff *skb;
750         struct nlattr *attr;
751         struct ofp_port_status *ops;
752         void *data;
753         int err = -ENOMEM;
754
755
756         skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
757         if (!skb) {
758                 if (net_ratelimit())
759                         printk("send_port_status: genlmsg_new failed\n");
760                 goto error;
761         }
762
763         data = genlmsg_put(skb, 0, 0, &dp_genl_family, 0,
764                            DP_GENL_C_OPENFLOW);
765         if (data == NULL) {
766                 if (net_ratelimit())
767                         printk("send_port_status: genlmsg_put failed\n");
768                 goto error;
769         }
770
771         NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, p->dp->dp_idx);
772
773         attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, sizeof(*ops));
774         if (!attr) {
775                 if (net_ratelimit())
776                         printk("send_port_status: nla_reserve failed\n");
777                 goto error;
778         }
779
780         ops = nla_data(attr);
781         ops->header.version = OFP_VERSION;
782         ops->header.type    = OFPT_PORT_STATUS;
783         ops->header.length  = htons(sizeof(*ops));
784         ops->header.xid     = htonl(0);
785
786         ops->reason         = status;
787         fill_port_desc(p, &ops->desc);
788
789         err = genlmsg_end(skb, data);
790         if (err < 0) {
791                 if (net_ratelimit())
792                         printk("send_port_status: genlmsg_end failed\n");
793                 goto error;
794         }
795
796         err = genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC);
797         if (err && net_ratelimit())
798                 printk(KERN_WARNING "send_port_status: genlmsg_multicast failed: %d\n", err);
799
800         return err;
801
802 nla_put_failure:
803 error:
804         kfree_skb(skb);
805         if (net_ratelimit())
806                 printk(KERN_ERR "send_port_status: failed to send: %d\n", err);
807         return err;
808 }
809
810 int 
811 dp_send_flow_expired(struct datapath *dp, struct sw_flow *flow)
812 {
813         struct sk_buff *skb;
814         struct nlattr *attr;
815         struct ofp_flow_expired *ofe;
816         void *data;
817         unsigned long duration_j;
818         int err = -ENOMEM;
819
820
821         skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
822         if (!skb) {
823                 if (net_ratelimit())
824                         printk("dp_send_flow_expired: genlmsg_new failed\n");
825                 goto error;
826         }
827
828         data = genlmsg_put(skb, 0, 0, &dp_genl_family, 0,
829                            DP_GENL_C_OPENFLOW);
830         if (data == NULL) {
831                 if (net_ratelimit())
832                         printk("dp_send_flow_expired: genlmsg_put failed\n");
833                 goto error;
834         }
835
836         NLA_PUT_U32(skb, DP_GENL_A_DP_IDX, dp->dp_idx);
837
838         attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, sizeof(*ofe));
839         if (!attr) {
840                 if (net_ratelimit())
841                         printk("dp_send_flow_expired: nla_reserve failed\n");
842                 goto error;
843         }
844
845         ofe = nla_data(attr);
846         ofe->header.version = OFP_VERSION;
847         ofe->header.type    = OFPT_FLOW_EXPIRED;
848         ofe->header.length  = htons(sizeof(*ofe));
849         ofe->header.xid     = htonl(0);
850
851         flow_fill_match(&ofe->match, &flow->key);
852         duration_j = (flow->timeout - HZ * flow->max_idle) - flow->init_time;
853         ofe->duration   = htonl(duration_j / HZ);
854         ofe->packet_count   = cpu_to_be64(flow->packet_count);
855         ofe->byte_count     = cpu_to_be64(flow->byte_count);
856
857         err = genlmsg_end(skb, data);
858         if (err < 0) {
859                 if (net_ratelimit())
860                         printk("dp_send_flow_expired: genlmsg_end failed\n");
861                 goto error;
862         }
863
864         err = genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC);
865         if (err && net_ratelimit())
866                 printk(KERN_WARNING "send_flow_expired: genlmsg_multicast failed: %d\n", err);
867
868         return err;
869
870 nla_put_failure:
871 error:
872         kfree_skb(skb);
873         if (net_ratelimit())
874                 printk(KERN_ERR "send_flow_expired: failed to send: %d\n", err);
875         return err;
876 }
877
878 /* Generic Netlink interface.
879  *
880  * See netlink(7) for an introduction to netlink.  See
881  * http://linux-net.osdl.org/index.php/Netlink for more information and
882  * pointers on how to work with netlink and Generic Netlink in the kernel and
883  * in userspace. */
884
885 static struct genl_family dp_genl_family = {
886         .id = GENL_ID_GENERATE,
887         .hdrsize = 0,
888         .name = DP_GENL_FAMILY_NAME,
889         .version = 1,
890         .maxattr = DP_GENL_A_MAX,
891 };
892
893 /* Attribute policy: what each attribute may contain.  */
894 static struct nla_policy dp_genl_policy[DP_GENL_A_MAX + 1] = {
895         [DP_GENL_A_DP_IDX] = { .type = NLA_U32 },
896         [DP_GENL_A_MC_GROUP] = { .type = NLA_U32 },
897         [DP_GENL_A_PORTNAME] = { .type = NLA_STRING }
898 };
899
900 static int dp_genl_add(struct sk_buff *skb, struct genl_info *info)
901 {
902         if (!info->attrs[DP_GENL_A_DP_IDX])
903                 return -EINVAL;
904
905         return new_dp(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
906 }
907
908 static struct genl_ops dp_genl_ops_add_dp = {
909         .cmd = DP_GENL_C_ADD_DP,
910         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
911         .policy = dp_genl_policy,
912         .doit = dp_genl_add,
913         .dumpit = NULL,
914 };
915
916 struct datapath *dp_get(int dp_idx)
917 {
918         if (dp_idx < 0 || dp_idx > DP_MAX)
919                 return NULL;
920         return rcu_dereference(dps[dp_idx]);
921 }
922
923 static int dp_genl_del(struct sk_buff *skb, struct genl_info *info)
924 {
925         struct datapath *dp;
926         int err;
927
928         if (!info->attrs[DP_GENL_A_DP_IDX])
929                 return -EINVAL;
930
931         mutex_lock(&dp_mutex);
932         dp = dp_get(nla_get_u32((info->attrs[DP_GENL_A_DP_IDX])));
933         if (!dp)
934                 err = -ENOENT;
935         else {
936                 del_dp(dp);
937                 err = 0;
938         }
939         mutex_unlock(&dp_mutex);
940         return err;
941 }
942
943 static struct genl_ops dp_genl_ops_del_dp = {
944         .cmd = DP_GENL_C_DEL_DP,
945         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
946         .policy = dp_genl_policy,
947         .doit = dp_genl_del,
948         .dumpit = NULL,
949 };
950
951 /* Queries a datapath for related information.  Currently the only relevant
952  * information is the datapath's multicast group ID.  Really we want one
953  * multicast group per datapath, but because of locking issues[*] we can't
954  * easily get one.  Thus, every datapath will currently return the same
955  * global multicast group ID, but in the future it would be nice to fix that.
956  *
957  * [*] dp_genl_add, to add a new datapath, is called under the genl_lock
958  *       mutex, and genl_register_mc_group, called to acquire a new multicast
959  *       group ID, also acquires genl_lock, thus deadlock.
960  */
961 static int dp_genl_query(struct sk_buff *skb, struct genl_info *info)
962 {
963         struct datapath *dp;
964         struct sk_buff *ans_skb = NULL;
965         int dp_idx;
966         int err = -ENOMEM;
967
968         if (!info->attrs[DP_GENL_A_DP_IDX])
969                 return -EINVAL;
970
971         rcu_read_lock();
972         dp_idx = nla_get_u32((info->attrs[DP_GENL_A_DP_IDX]));
973         dp = dp_get(dp_idx);
974         if (!dp)
975                 err = -ENOENT;
976         else {
977                 void *data;
978                 ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
979                 if (!ans_skb) {
980                         err = -ENOMEM;
981                         goto err;
982                 }
983                 data = genlmsg_put_reply(ans_skb, info, &dp_genl_family,
984                                          0, DP_GENL_C_QUERY_DP);
985                 if (data == NULL) {
986                         err = -ENOMEM;
987                         goto err;
988                 }
989                 NLA_PUT_U32(ans_skb, DP_GENL_A_DP_IDX, dp_idx);
990                 NLA_PUT_U32(ans_skb, DP_GENL_A_MC_GROUP, mc_group.id);
991
992                 genlmsg_end(ans_skb, data);
993                 err = genlmsg_reply(ans_skb, info);
994                 if (!err)
995                         ans_skb = NULL;
996         }
997 err:
998 nla_put_failure:
999         if (ans_skb)
1000                 kfree_skb(ans_skb);
1001         rcu_read_unlock();
1002         return err;
1003 }
1004
1005 /*
1006  * Fill flow entry for nl flow query.  Called with rcu_lock  
1007  *
1008  */
1009 static
1010 int
1011 dp_fill_flow(struct ofp_flow_mod* ofm, struct swt_iterator* iter)
1012 {
1013         ofm->header.version  = OFP_VERSION;
1014         ofm->header.type     = OFPT_FLOW_MOD;
1015         ofm->header.length   = htons(sizeof(struct ofp_flow_mod) 
1016                                 + sizeof(ofm->actions[0]));
1017         ofm->header.xid      = htonl(0);
1018
1019         ofm->match.wildcards = htons(iter->flow->key.wildcards);
1020         ofm->match.in_port   = iter->flow->key.in_port;
1021         ofm->match.dl_vlan   = iter->flow->key.dl_vlan;
1022         memcpy(ofm->match.dl_src, iter->flow->key.dl_src, ETH_ALEN);
1023         memcpy(ofm->match.dl_dst, iter->flow->key.dl_dst, ETH_ALEN);
1024         ofm->match.dl_type   = iter->flow->key.dl_type;
1025         ofm->match.nw_src    = iter->flow->key.nw_src;
1026         ofm->match.nw_dst    = iter->flow->key.nw_dst;
1027         ofm->match.nw_proto  = iter->flow->key.nw_proto;
1028         ofm->match.tp_src    = iter->flow->key.tp_src;
1029         ofm->match.tp_dst    = iter->flow->key.tp_dst;
1030         ofm->group_id        = iter->flow->group_id;
1031         ofm->max_idle        = iter->flow->max_idle;
1032         /* TODO support multiple actions  */
1033         ofm->actions[0]      = iter->flow->actions[0];
1034
1035         return 0;
1036 }
1037
1038 static int dp_genl_show(struct sk_buff *skb, struct genl_info *info)
1039 {
1040         struct datapath *dp;
1041         int err = -ENOMEM;
1042         struct sk_buff *ans_skb = NULL;
1043         void *data;
1044         struct nlattr *attr;
1045         struct ofp_data_hello *odh;
1046         size_t odh_max_len, odh_len, port_max_len, len;
1047         int port_count;
1048
1049         if (!info->attrs[DP_GENL_A_DP_IDX])
1050                 return -EINVAL;
1051
1052         mutex_lock(&dp_mutex);
1053         dp = dp_get(nla_get_u32((info->attrs[DP_GENL_A_DP_IDX])));
1054         if (!dp)
1055                 goto error;
1056
1057         /* Overallocate, since we can't reliably determine the number of
1058          * ports a priori. */
1059         port_max_len = sizeof(struct ofp_phy_port) * OFPP_MAX;
1060
1061         len = nla_total_size(sizeof(*odh) + port_max_len)
1062                         + nla_total_size(sizeof(uint32_t));
1063
1064         ans_skb = nlmsg_new(MAX(len, NLMSG_GOODSIZE), GFP_KERNEL);
1065         if (!ans_skb)
1066                 goto error;
1067
1068         data = genlmsg_put_reply(ans_skb, info, &dp_genl_family,
1069                                  0, DP_GENL_C_SHOW_DP);
1070         if (data == NULL) 
1071                 goto error;
1072
1073         NLA_PUT_U32(ans_skb, DP_GENL_A_DP_IDX, dp->dp_idx);
1074
1075         odh_max_len = sizeof(*odh) + port_max_len;
1076         attr = nla_reserve(ans_skb, DP_GENL_A_DP_INFO, odh_max_len);
1077         if (!attr)
1078                 goto error;
1079         odh = nla_data(attr);
1080         port_count = fill_data_hello(dp, odh);
1081
1082         /* Only now that we know how many ports we've added can we say
1083          * say something about the length. */
1084         odh_len = sizeof(*odh) + (sizeof(struct ofp_phy_port) * port_count);
1085         odh->header.length = htons(odh_len);
1086
1087         /* Take back the unused part that was reserved */
1088         nla_unreserve(ans_skb, attr, (odh_max_len - odh_len));
1089
1090         genlmsg_end(ans_skb, data);
1091         err = genlmsg_reply(ans_skb, info);
1092         if (!err)
1093                 ans_skb = NULL;
1094
1095 error:
1096 nla_put_failure:
1097         if (ans_skb)
1098                 kfree_skb(ans_skb);
1099         mutex_unlock(&dp_mutex);
1100         return err;
1101 }
1102
1103 static struct genl_ops dp_genl_ops_show_dp = {
1104         .cmd = DP_GENL_C_SHOW_DP,
1105         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1106         .policy = dp_genl_policy,
1107         .doit = dp_genl_show,
1108         .dumpit = NULL,
1109 };
1110
1111 /* Convenience function */
1112 static
1113 void* 
1114 dp_init_nl_flow_msg(uint32_t dp_idx, uint16_t table_idx, 
1115                 struct genl_info *info, struct sk_buff* skb)
1116 {
1117         void* data;
1118
1119         data = genlmsg_put_reply(skb, info, &dp_genl_family, 0, 
1120                                 DP_GENL_C_QUERY_FLOW);
1121         if (data == NULL)
1122                 return NULL;
1123         NLA_PUT_U32(skb, DP_GENL_A_DP_IDX,   dp_idx);
1124         NLA_PUT_U16(skb, DP_GENL_A_TABLEIDX, table_idx);
1125
1126         return data;
1127
1128 nla_put_failure:
1129         return NULL;
1130 }
1131
1132 /*  Iterate through the specified table and send all flow entries over
1133  *  netlink to userspace.  Each flow message has the following format:
1134  *
1135  *  32bit dpix
1136  *  16bit tabletype
1137  *  32bit number of flows
1138  *  openflow-flow-entries
1139  *
1140  *  The full table may require multiple messages.  A message with 0 flows
1141  *  signifies end-of message.
1142  */
1143
1144 static 
1145 int 
1146 dp_dump_table(struct datapath *dp, uint16_t table_idx, struct genl_info *info, struct ofp_flow_mod* matchme) 
1147
1148         struct sk_buff  *skb = 0; 
1149         struct sw_table *table = 0;
1150         struct swt_iterator iter;
1151         struct sw_flow_key in_flow; 
1152         struct nlattr   *attr;
1153         int count = 0, sum_count = 0;
1154         void *data; 
1155         uint8_t* ofm_ptr = 0;
1156         struct nlattr   *num_attr; 
1157         int err = -ENOMEM;
1158
1159         table = dp->chain->tables[table_idx]; 
1160         if ( table == NULL ) {
1161                 dprintk("dp::dp_dump_table error, non-existant table at position %d\n", table_idx);
1162                 return -EINVAL;
1163         }
1164
1165         if (!table->iterator(table, &iter)) {
1166                 dprintk("dp::dp_dump_table couldn't initialize empty table iterator\n");
1167                 return -ENOMEM;
1168         }
1169
1170         while (iter.flow) {
1171
1172                 /* verify that we can fit all NL_FLOWS_PER_MESSAGE in a single
1173                  * sk_buf */
1174                 if( (sizeof(dp_genl_family) + sizeof(uint32_t) + sizeof(uint16_t) + sizeof(uint32_t) + 
1175                                         (NL_FLOWS_PER_MESSAGE * sizeof(struct ofp_flow_mod))) > (8192 - 64)){
1176                         dprintk("dp::dp_dump_table NL_FLOWS_PER_MESSAGE may cause overrun in skbuf\n");
1177                         return -ENOMEM;
1178                 }
1179
1180                 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
1181                 if (skb == NULL) {
1182                         return -ENOMEM;
1183                 }
1184
1185                 data = dp_init_nl_flow_msg(dp->dp_idx, table_idx, info, skb);
1186                 if (data == NULL){
1187                         err= -ENOMEM;   
1188                         goto error_free_skb;
1189                 } 
1190
1191                 /* reserve space to put the number of flows for this message, to
1192                  * be filled after the loop*/
1193                 num_attr = nla_reserve(skb, DP_GENL_A_NUMFLOWS, sizeof(uint32_t));
1194                 if(!num_attr){
1195                         err = -ENOMEM;
1196                         goto error_free_skb;
1197                 }
1198
1199                 /* Only load NL_FLOWS_PER_MESSAGE flows at a time */
1200                 attr = nla_reserve(skb, DP_GENL_A_FLOW, 
1201                                 (sizeof(struct ofp_flow_mod) + sizeof(struct ofp_action)) * NL_FLOWS_PER_MESSAGE);
1202                 if (!attr){
1203                         err = -ENOMEM;
1204                         goto error_free_skb;
1205                 }
1206
1207                 /* internal loop to fill NL_FLOWS_PER_MESSAGE flows */
1208                 ofm_ptr = nla_data(attr);
1209                 flow_extract_match(&in_flow, &matchme->match);
1210                 while (iter.flow && count < NL_FLOWS_PER_MESSAGE) {
1211                         if(flow_matches(&in_flow, &iter.flow->key)){
1212                                 if((err = dp_fill_flow((struct ofp_flow_mod*)ofm_ptr, &iter))) 
1213                                         goto error_free_skb;
1214                                 count++; 
1215                                 /* TODO support multiple actions  */
1216                                 ofm_ptr += sizeof(struct ofp_flow_mod) + sizeof(struct ofp_action);
1217                         }
1218                         table->iterator_next(&iter);
1219                 }
1220
1221                 *((uint32_t*)nla_data(num_attr)) = count;
1222                 genlmsg_end(skb, data); 
1223
1224                 sum_count += count; 
1225                 count = 0;
1226
1227                 err = genlmsg_unicast(skb, info->snd_pid); 
1228                 skb = 0;
1229         }
1230
1231         /* send a sentinal message saying we're done */
1232         skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
1233         if (skb == NULL) {
1234                 return -ENOMEM;
1235         }
1236         data = dp_init_nl_flow_msg(dp->dp_idx, table_idx, info, skb);
1237         if (data == NULL){
1238                 err= -ENOMEM;   
1239                 goto error_free_skb;
1240         } 
1241
1242         NLA_PUT_U32(skb, DP_GENL_A_NUMFLOWS,   0);
1243         /* dummy flow so nl doesn't complain */
1244         attr = nla_reserve(skb, DP_GENL_A_FLOW, sizeof(struct ofp_flow_mod));
1245         if (!attr){
1246                 err = -ENOMEM;
1247                 goto error_free_skb;
1248         }
1249         genlmsg_end(skb, data); 
1250         err = genlmsg_reply(skb, info); skb = 0;
1251
1252 nla_put_failure:
1253 error_free_skb:
1254         if(skb)
1255                 kfree_skb(skb);
1256         return err;
1257 }
1258
1259 /* Helper function to query_table which creates and sends a message packed with
1260  * table stats.  Message form is:
1261  *
1262  * u32 DP_IDX
1263  * u32 NUM_TABLES
1264  * OFP_TABLE (list of OFP_TABLES)
1265  *
1266  */
1267
1268 static 
1269 int 
1270 dp_dump_table_stats(struct datapath *dp, int dp_idx, struct genl_info *info) 
1271
1272         struct sk_buff   *skb = 0; 
1273         struct ofp_table *ot = 0;
1274         struct nlattr   *attr;
1275         struct sw_table_stats stats; 
1276         size_t len;
1277         void *data; 
1278         int err = -ENOMEM;
1279         int i = 0;
1280         int nt = dp->chain->n_tables;
1281
1282         len = 4 + 4 + (sizeof(struct ofp_table) * nt);
1283
1284         /* u32 IDX, u32 NUMTABLES, list-of-tables */
1285         skb = nlmsg_new(MAX(len, NLMSG_GOODSIZE), GFP_ATOMIC);
1286         if (skb == NULL) {
1287                 return -ENOMEM;
1288         }
1289         
1290         data = genlmsg_put_reply(skb, info, &dp_genl_family, 0, 
1291                                 DP_GENL_C_QUERY_TABLE);
1292         if (data == NULL){
1293                 return -ENOMEM;
1294         } 
1295
1296         NLA_PUT_U32(skb, DP_GENL_A_DP_IDX,      dp_idx);
1297         NLA_PUT_U32(skb, DP_GENL_A_NUMTABLES, nt);
1298
1299         /* ... we assume that all tables can fit in a single message.
1300          * Probably a reasonable assumption seeing that we only have
1301          * 3 atm */
1302         attr = nla_reserve(skb, DP_GENL_A_TABLE, (sizeof(struct ofp_table) * nt));
1303         if (!attr){
1304                 err = -ENOMEM;
1305                 goto error_free_skb;
1306         }
1307
1308         ot = nla_data(attr);
1309
1310         for (i = 0; i < nt; ++i) {
1311                 dp->chain->tables[i]->stats(dp->chain->tables[i], &stats);
1312                 ot->header.version = OFP_VERSION;
1313                 ot->header.type    = OFPT_TABLE;
1314                 ot->header.length  = htons(sizeof(struct ofp_table));
1315                 ot->header.xid     = htonl(0);
1316
1317                 strncpy(ot->name, stats.name, OFP_MAX_TABLE_NAME_LEN); 
1318                 ot->table_id  = htons(i);
1319                 ot->n_flows   = htonl(stats.n_flows);
1320                 ot->max_flows = htonl(stats.max_flows);
1321                 ot++;
1322         }
1323
1324         genlmsg_end(skb, data); 
1325         err = genlmsg_reply(skb, info); skb = 0;
1326
1327 nla_put_failure:
1328 error_free_skb:
1329         if(skb)
1330                 kfree_skb(skb);
1331         return err;
1332 }
1333
1334 /* 
1335  * Queries a datapath for flow-table statistics 
1336  */
1337
1338
1339 static int dp_genl_table_query(struct sk_buff *skb, struct genl_info *info)
1340 {
1341         struct   datapath* dp;
1342         int       err = 0;
1343
1344         if (!info->attrs[DP_GENL_A_DP_IDX]) {
1345                 dprintk("dp::dp_genl_table_query received message with missing attributes\n");
1346                 return -EINVAL;
1347         }
1348
1349         rcu_read_lock();
1350         dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
1351         if (!dp) {
1352                 err = -ENOENT;
1353                 goto err_out;
1354         }
1355
1356         err = dp_dump_table_stats(dp, nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]), info); 
1357
1358 err_out:
1359         rcu_read_unlock();
1360         return err;
1361 }
1362
1363 /* 
1364  * Queries a datapath for flow-table entries.
1365  */
1366
1367 static int dp_genl_flow_query(struct sk_buff *skb, struct genl_info *info)
1368 {
1369         struct datapath* dp;
1370         struct ofp_flow_mod*  ofm;
1371         u16     table_idx;
1372         int     err = 0;
1373
1374         if (!info->attrs[DP_GENL_A_DP_IDX]
1375                                 || !info->attrs[DP_GENL_A_TABLEIDX]
1376                                 || !info->attrs[DP_GENL_A_FLOW]) {
1377                 dprintk("dp::dp_genl_flow_query received message with missing attributes\n");
1378                 return -EINVAL;
1379         }
1380
1381         rcu_read_lock();
1382         dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
1383         if (!dp) {
1384                 err = -ENOENT;
1385                 goto err_out;
1386         }
1387
1388         table_idx = nla_get_u16(info->attrs[DP_GENL_A_TABLEIDX]);
1389
1390         if (dp->chain->n_tables <= table_idx){
1391                 printk("table index %d invalid (dp has %d tables)\n",
1392                                 table_idx, dp->chain->n_tables);
1393         err = -EINVAL;
1394                 goto err_out;
1395         }
1396
1397         ofm = nla_data(info->attrs[DP_GENL_A_FLOW]);
1398         err = dp_dump_table(dp, table_idx, info, ofm); 
1399
1400 err_out:
1401         rcu_read_unlock();
1402         return err;
1403 }
1404
1405 static struct nla_policy dp_genl_flow_policy[DP_GENL_A_MAX + 1] = {
1406         [DP_GENL_A_DP_IDX]      = { .type = NLA_U32 },
1407         [DP_GENL_A_TABLEIDX] = { .type = NLA_U16 },
1408         [DP_GENL_A_NUMFLOWS]  = { .type = NLA_U32 },
1409 };
1410
1411 static struct genl_ops dp_genl_ops_query_flow = {
1412         .cmd    = DP_GENL_C_QUERY_FLOW,
1413         .flags  = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1414         .policy = dp_genl_flow_policy,
1415         .doit   = dp_genl_flow_query,
1416         .dumpit = NULL,
1417 };
1418
1419 static struct nla_policy dp_genl_table_policy[DP_GENL_A_MAX + 1] = {
1420         [DP_GENL_A_DP_IDX]      = { .type = NLA_U32 },
1421 };
1422
1423 static struct genl_ops dp_genl_ops_query_table = {
1424         .cmd    = DP_GENL_C_QUERY_TABLE,
1425         .flags  = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1426         .policy = dp_genl_table_policy,
1427         .doit   = dp_genl_table_query,
1428         .dumpit = NULL,
1429 };
1430
1431
1432 static struct genl_ops dp_genl_ops_query_dp = {
1433         .cmd = DP_GENL_C_QUERY_DP,
1434         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1435         .policy = dp_genl_policy,
1436         .doit = dp_genl_query,
1437         .dumpit = NULL,
1438 };
1439
1440 static int dp_genl_add_del_port(struct sk_buff *skb, struct genl_info *info)
1441 {
1442         struct datapath *dp;
1443         struct net_device *port;
1444         int err;
1445
1446         if (!info->attrs[DP_GENL_A_DP_IDX] || !info->attrs[DP_GENL_A_PORTNAME])
1447                 return -EINVAL;
1448
1449         /* Get datapath. */
1450         mutex_lock(&dp_mutex);
1451         dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
1452         if (!dp) {
1453                 err = -ENOENT;
1454                 goto out;
1455         }
1456
1457         /* Get interface to add/remove. */
1458         port = dev_get_by_name(&init_net, 
1459                         nla_data(info->attrs[DP_GENL_A_PORTNAME]));
1460         if (!port) {
1461                 err = -ENOENT;
1462                 goto out;
1463         }
1464
1465         /* Execute operation. */
1466         if (info->genlhdr->cmd == DP_GENL_C_ADD_PORT)
1467                 err = add_switch_port(dp, port);
1468         else {
1469                 if (port->br_port == NULL || port->br_port->dp != dp) {
1470                         err = -ENOENT;
1471                         goto out_put;
1472                 }
1473                 err = del_switch_port(port->br_port);
1474         }
1475
1476 out_put:
1477         dev_put(port);
1478 out:
1479         mutex_unlock(&dp_mutex);
1480         return err;
1481 }
1482
1483 static struct genl_ops dp_genl_ops_add_port = {
1484         .cmd = DP_GENL_C_ADD_PORT,
1485         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1486         .policy = dp_genl_policy,
1487         .doit = dp_genl_add_del_port,
1488         .dumpit = NULL,
1489 };
1490
1491 static struct genl_ops dp_genl_ops_del_port = {
1492         .cmd = DP_GENL_C_DEL_PORT,
1493         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1494         .policy = dp_genl_policy,
1495         .doit = dp_genl_add_del_port,
1496         .dumpit = NULL,
1497 };
1498
1499 static int dp_genl_openflow(struct sk_buff *skb, struct genl_info *info)
1500 {
1501         struct nlattr *va = info->attrs[DP_GENL_A_OPENFLOW];
1502         struct datapath *dp;
1503         int err;
1504
1505         if (!info->attrs[DP_GENL_A_DP_IDX] || !va)
1506                 return -EINVAL;
1507
1508         rcu_read_lock();
1509         dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
1510         if (!dp) {
1511                 err = -ENOENT;
1512                 goto out;
1513         }
1514
1515         va = info->attrs[DP_GENL_A_OPENFLOW];
1516
1517         err = fwd_control_input(dp->chain, nla_data(va), nla_len(va));
1518
1519 out:
1520         rcu_read_unlock();
1521         return err;
1522 }
1523
1524 static struct nla_policy dp_genl_openflow_policy[DP_GENL_A_MAX + 1] = {
1525         [DP_GENL_A_DP_IDX] = { .type = NLA_U32 },
1526 };
1527
1528 static struct genl_ops dp_genl_ops_openflow = {
1529         .cmd = DP_GENL_C_OPENFLOW,
1530         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1531         .policy = dp_genl_openflow_policy,
1532         .doit = dp_genl_openflow,
1533         .dumpit = NULL,
1534 };
1535
1536 static struct nla_policy dp_genl_benchmark_policy[DP_GENL_A_MAX + 1] = {
1537         [DP_GENL_A_DP_IDX] = { .type = NLA_U32 },
1538         [DP_GENL_A_NPACKETS] = { .type = NLA_U32 },
1539         [DP_GENL_A_PSIZE] = { .type = NLA_U32 },
1540 };
1541
1542 static struct genl_ops dp_genl_ops_benchmark_nl = {
1543         .cmd = DP_GENL_C_BENCHMARK_NL,
1544         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1545         .policy = dp_genl_benchmark_policy,
1546         .doit = dp_genl_benchmark_nl,
1547         .dumpit = NULL,
1548 };
1549
1550 static struct genl_ops *dp_genl_all_ops[] = {
1551         /* Keep this operation first.  Generic Netlink dispatching
1552          * looks up operations with linear search, so we want it at the
1553          * front. */
1554         &dp_genl_ops_openflow,
1555
1556         &dp_genl_ops_query_flow,
1557         &dp_genl_ops_query_table,
1558         &dp_genl_ops_show_dp,
1559         &dp_genl_ops_add_dp,
1560         &dp_genl_ops_del_dp,
1561         &dp_genl_ops_query_dp,
1562         &dp_genl_ops_add_port,
1563         &dp_genl_ops_del_port,
1564         &dp_genl_ops_benchmark_nl,
1565 };
1566
1567 static int dp_init_netlink(void)
1568 {
1569         int err;
1570         int i;
1571
1572         err = genl_register_family(&dp_genl_family);
1573         if (err)
1574                 return err;
1575
1576         for (i = 0; i < ARRAY_SIZE(dp_genl_all_ops); i++) {
1577                 err = genl_register_ops(&dp_genl_family, dp_genl_all_ops[i]);
1578                 if (err)
1579                         goto err_unregister;
1580         }
1581
1582         strcpy(mc_group.name, "openflow");
1583         err = genl_register_mc_group(&dp_genl_family, &mc_group);
1584         if (err < 0)
1585                 goto err_unregister;
1586
1587         return 0;
1588
1589 err_unregister:
1590         genl_unregister_family(&dp_genl_family);
1591                 return err;
1592 }
1593
1594 static void dp_uninit_netlink(void)
1595 {
1596         genl_unregister_family(&dp_genl_family);
1597 }
1598
1599 #define DRV_NAME                "openflow"
1600 #define DRV_VERSION      VERSION
1601 #define DRV_DESCRIPTION "OpenFlow switching datapath implementation"
1602 #define DRV_COPYRIGHT   "Copyright (c) 2007, 2008 The Board of Trustees of The Leland Stanford Junior University"
1603
1604
1605 static int __init dp_init(void)
1606 {
1607         int err;
1608
1609         printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n");
1610         printk(KERN_INFO DRV_NAME ": " VERSION" built on "__DATE__" "__TIME__"\n");
1611         printk(KERN_INFO DRV_NAME ": " DRV_COPYRIGHT "\n");
1612
1613         err = flow_init();
1614         if (err)
1615                 goto error;
1616
1617         err = dp_init_netlink();
1618         if (err)
1619                 goto error_flow_exit;
1620
1621         /* Hook into callback used by the bridge to intercept packets.
1622          * Parasites we are. */
1623         if (br_handle_frame_hook)
1624                 printk("openflow: hijacking bridge hook\n");
1625         br_handle_frame_hook = dp_frame_hook;
1626
1627         return 0;
1628
1629 error_flow_exit:
1630         flow_exit();
1631 error:
1632         printk(KERN_EMERG "openflow: failed to install!");
1633         return err;
1634 }
1635
1636 static void dp_cleanup(void)
1637 {
1638         fwd_exit();
1639         dp_uninit_netlink();
1640         flow_exit();
1641         br_handle_frame_hook = NULL;
1642 }
1643
1644 module_init(dp_init);
1645 module_exit(dp_cleanup);
1646
1647 MODULE_DESCRIPTION(DRV_DESCRIPTION);
1648 MODULE_AUTHOR(DRV_COPYRIGHT);
1649 MODULE_LICENSE("GPL");