Merge citrix branch into master.
[sliver-openvswitch.git] / datapath / brcompat.c
1 /*
2  * Copyright (c) 2009 Nicira Networks.
3  * Distributed under the terms of the GNU GPL version 2.
4  *
5  * Significant portions of this file may be copied from parts of the Linux
6  * kernel, by Linus Torvalds and others.
7  */
8
9 #include <linux/kernel.h>
10 #include <asm/uaccess.h>
11 #include <linux/completion.h>
12 #include <linux/delay.h>
13 #include <linux/etherdevice.h>
14 #include <linux/if_bridge.h>
15 #include <linux/rculist.h>
16 #include <linux/netdevice.h>
17 #include <linux/rtnetlink.h>
18 #include <net/genetlink.h>
19
20 #include "compat.h"
21 #include "openvswitch/brcompat-netlink.h"
22 #include "brc_procfs.h"
23 #include "brc_sysfs.h"
24 #include "datapath.h"
25 #include "dp_dev.h"
26
27 static struct genl_family brc_genl_family;
28 static struct genl_multicast_group brc_mc_group;
29
30 /* Time to wait for ovs-vswitchd to respond to a datapath action, in
31  * jiffies. */
32 #define BRC_TIMEOUT (HZ * 5)
33
34 /* Mutex to serialize ovs-brcompatd callbacks.  (Some callbacks naturally hold
35  * br_ioctl_mutex, others hold rtnl_lock, but we can't take the former
36  * ourselves and we don't want to hold the latter over a potentially long
37  * period of time.) */
38 static DEFINE_MUTEX(brc_serial);
39
40 /* Userspace communication. */
41 static DEFINE_SPINLOCK(brc_lock);    /* Ensure atomic access to these vars. */
42 static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */
43 static struct sk_buff *brc_reply;    /* Reply from userspace. */
44 static u32 brc_seq;                  /* Sequence number for current op. */
45
46 static struct sk_buff *brc_send_command(struct sk_buff *, struct nlattr **attrs);
47 static int brc_send_simple_command(struct sk_buff *);
48
49 static int
50 get_dp_ifindices(int *indices, int num)
51 {
52         int i, index = 0;
53
54         rcu_read_lock();
55         for (i=0; i < ODP_MAX && index < num; i++) {
56                 struct datapath *dp = get_dp(i);
57                 if (!dp)
58                         continue;
59                 indices[index++] = dp->ports[ODPP_LOCAL]->dev->ifindex;
60         }
61         rcu_read_unlock();
62
63         return index;
64 }
65
66 static void
67 get_port_ifindices(struct datapath *dp, int *ifindices, int num)
68 {
69         struct net_bridge_port *p;
70
71         rcu_read_lock();
72         list_for_each_entry_rcu (p, &dp->port_list, node) {
73                 if (p->port_no < num)
74                         ifindices[p->port_no] = p->dev->ifindex;
75         }
76         rcu_read_unlock();
77 }
78
79 static struct sk_buff *
80 brc_make_request(int op, const char *bridge, const char *port)
81 {
82         struct sk_buff *skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
83         if (!skb)
84                 goto error;
85
86         genlmsg_put(skb, 0, 0, &brc_genl_family, 0, op);
87         NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
88         if (port)
89                 NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port);
90         return skb;
91
92 nla_put_failure:
93         kfree_skb(skb);
94 error:
95         return NULL;
96 }
97
98 static int brc_send_simple_command(struct sk_buff *request)
99 {
100         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
101         struct sk_buff *reply;
102         int error;
103
104         reply = brc_send_command(request, attrs);
105         if (IS_ERR(reply))
106                 return PTR_ERR(reply);
107
108         error = nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
109         kfree_skb(reply);
110         return -error;
111 }
112
113 static int brc_add_del_bridge(char __user *uname, int add)
114 {
115         struct sk_buff *request;
116         char name[IFNAMSIZ];
117
118         if (copy_from_user(name, uname, IFNAMSIZ))
119                 return -EFAULT;
120
121         name[IFNAMSIZ - 1] = 0;
122         request = brc_make_request(add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL,
123                                    name, NULL);
124         if (!request)
125                 return -ENOMEM;
126
127         return brc_send_simple_command(request);
128 }
129
130 static int brc_get_bridges(int __user *uindices, int n)
131 {
132         int *indices;
133         int ret;
134
135         if (n >= 2048)
136                 return -ENOMEM;
137
138         indices = kcalloc(n, sizeof(int), GFP_KERNEL);
139         if (indices == NULL)
140                 return -ENOMEM;
141
142         n = get_dp_ifindices(indices, n);
143
144         ret = copy_to_user(uindices, indices, n * sizeof(int)) ? -EFAULT : n;
145
146         kfree(indices);
147         return ret;
148 }
149
150 /* Legacy deviceless bridge ioctl's.  Called with br_ioctl_mutex. */
151 static int
152 old_deviceless(void __user *uarg)
153 {
154         unsigned long args[3];
155
156         if (copy_from_user(args, uarg, sizeof(args)))
157                 return -EFAULT;
158
159         switch (args[0]) {
160         case BRCTL_GET_BRIDGES:
161                 return brc_get_bridges((int __user *)args[1], args[2]);
162
163         case BRCTL_ADD_BRIDGE:
164                 return brc_add_del_bridge((void __user *)args[1], 1);
165         case BRCTL_DEL_BRIDGE:
166                 return brc_add_del_bridge((void __user *)args[1], 0);
167         }
168
169         return -EOPNOTSUPP;
170 }
171
172 /* Called with the br_ioctl_mutex. */
173 static int
174 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
175 brc_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)
176 #else
177 brc_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
178 #endif
179 {
180         switch (cmd) {
181         case SIOCGIFBR:
182         case SIOCSIFBR:
183                 return old_deviceless(uarg);
184
185         case SIOCBRADDBR:
186                 return brc_add_del_bridge(uarg, 1);
187         case SIOCBRDELBR:
188                 return brc_add_del_bridge(uarg, 0);
189         }
190
191         return -EOPNOTSUPP;
192 }
193
194 static int
195 brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
196 {
197         struct sk_buff *request;
198         struct net_device *port;
199         int err;
200
201         port = __dev_get_by_index(&init_net, port_ifindex);
202         if (!port)
203                 return -EINVAL;
204
205         /* Save name of dev and port because there's a race between the
206          * rtnl_unlock() and the brc_send_simple_command(). */
207         request = brc_make_request(add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL,
208                                    dev->name, port->name);
209         if (!request)
210                 return -ENOMEM;
211
212         rtnl_unlock();
213         err = brc_send_simple_command(request);
214         rtnl_lock();
215
216         return err;
217 }
218
219 static int
220 brc_get_bridge_info(struct net_device *dev, struct __bridge_info __user *ub)
221 {
222         struct __bridge_info b;
223         u64 id = 0;
224         int i;
225
226         memset(&b, 0, sizeof(struct __bridge_info));
227
228         for (i=0; i<ETH_ALEN; i++)
229                 id |= (u64)dev->dev_addr[i] << (8*(ETH_ALEN-1 - i));
230         b.bridge_id = cpu_to_be64(id);
231         b.stp_enabled = 0;
232
233         if (copy_to_user(ub, &b, sizeof(struct __bridge_info)))
234                 return -EFAULT;
235
236         return 0;
237 }
238
239 static int
240 brc_get_port_list(struct net_device *dev, int __user *uindices, int num)
241 {
242         struct dp_dev *dp_dev = netdev_priv(dev);
243         struct datapath *dp = dp_dev->dp;
244         int *indices;
245
246         if (num < 0)
247                 return -EINVAL;
248         if (num == 0)
249                 num = 256;
250         if (num > DP_MAX_PORTS)
251                 num = DP_MAX_PORTS;
252
253         indices = kcalloc(num, sizeof(int), GFP_KERNEL);
254         if (indices == NULL)
255                 return -ENOMEM;
256
257         get_port_ifindices(dp, indices, num);
258         if (copy_to_user(uindices, indices, num * sizeof(int)))
259                 num = -EFAULT;
260         kfree(indices);
261         return num;
262 }
263
264 /*
265  * Format up to a page worth of forwarding table entries
266  * userbuf -- where to copy result
267  * maxnum  -- maximum number of entries desired
268  *            (limited to a page for sanity)
269  * offset  -- number of records to skip
270  */
271 static int brc_get_fdb_entries(struct net_device *dev, void __user *userbuf, 
272                                unsigned long maxnum, unsigned long offset)
273 {
274         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
275         struct sk_buff *request, *reply;
276         int retval;
277         int len;
278
279         /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */
280         if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry))
281                 maxnum = PAGE_SIZE/sizeof(struct __fdb_entry);
282
283         request = brc_make_request(BRC_GENL_C_FDB_QUERY, dev->name, NULL);
284         if (!request)
285                 return -ENOMEM;
286         NLA_PUT_U64(request, BRC_GENL_A_FDB_COUNT, maxnum);
287         NLA_PUT_U64(request, BRC_GENL_A_FDB_SKIP, offset);
288
289         rtnl_unlock();
290         reply = brc_send_command(request, attrs);
291         retval = PTR_ERR(reply);
292         if (IS_ERR(reply))
293                 goto exit;
294
295         retval = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
296         if (retval < 0)
297                 goto exit_free_skb;
298
299         retval = -EINVAL;
300         if (!attrs[BRC_GENL_A_FDB_DATA])
301                 goto exit_free_skb;
302         len = nla_len(attrs[BRC_GENL_A_FDB_DATA]);
303         if (len % sizeof(struct __fdb_entry) ||
304             len / sizeof(struct __fdb_entry) > maxnum)
305                 goto exit_free_skb;
306
307         retval = len / sizeof(struct __fdb_entry);
308         if (copy_to_user(userbuf, nla_data(attrs[BRC_GENL_A_FDB_DATA]), len))
309                 retval = -EFAULT;
310
311 exit_free_skb:
312         kfree_skb(reply);
313 exit:
314         rtnl_lock();
315         return retval;
316
317 nla_put_failure:
318         kfree_skb(request);
319         return -ENOMEM;
320 }
321
322 /* Legacy ioctl's through SIOCDEVPRIVATE.  Called with rtnl_lock. */
323 static int
324 old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
325 {
326         unsigned long args[4];
327
328         if (copy_from_user(args, rq->ifr_data, sizeof(args)))
329                 return -EFAULT;
330
331         switch (args[0]) {
332         case BRCTL_ADD_IF:
333                 return brc_add_del_port(dev, args[1], 1);
334         case BRCTL_DEL_IF:
335                 return brc_add_del_port(dev, args[1], 0);
336
337         case BRCTL_GET_BRIDGE_INFO:
338                 return brc_get_bridge_info(dev, (struct __bridge_info __user *)args[1]);
339
340         case BRCTL_GET_PORT_LIST:
341                 return brc_get_port_list(dev, (int __user *)args[1], args[2]);
342
343         case BRCTL_GET_FDB_ENTRIES:
344                 return brc_get_fdb_entries(dev, (void __user *)args[1],
345                                            args[2], args[3]);
346         }
347
348         return -EOPNOTSUPP;
349 }
350
351 /* Called with the rtnl_lock. */
352 static int
353 brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
354 {
355         int err;
356
357         switch (cmd) {
358                 case SIOCDEVPRIVATE:
359                         err = old_dev_ioctl(dev, rq, cmd);
360                         break;
361
362                 case SIOCBRADDIF:
363                         return brc_add_del_port(dev, rq->ifr_ifindex, 1);
364                 case SIOCBRDELIF:
365                         return brc_add_del_port(dev, rq->ifr_ifindex, 0);
366
367                 default:
368                         err = -EOPNOTSUPP;
369                         break;
370         }
371
372         return err;
373 }
374
375
376 static struct genl_family brc_genl_family = {
377         .id = GENL_ID_GENERATE,
378         .hdrsize = 0,
379         .name = BRC_GENL_FAMILY_NAME,
380         .version = 1,
381         .maxattr = BRC_GENL_A_MAX,
382 };
383
384 static int brc_genl_query(struct sk_buff *skb, struct genl_info *info)
385 {
386         int err = -EINVAL;
387         struct sk_buff *ans_skb;
388         void *data;
389
390         ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
391         if (!ans_skb) 
392                 return -ENOMEM;
393
394         data = genlmsg_put_reply(ans_skb, info, &brc_genl_family,
395                                  0, BRC_GENL_C_QUERY_MC);
396         if (data == NULL) {
397                 err = -ENOMEM;
398                 goto err;
399         }
400         NLA_PUT_U32(ans_skb, BRC_GENL_A_MC_GROUP, brc_mc_group.id);
401
402         genlmsg_end(ans_skb, data);
403         return genlmsg_reply(ans_skb, info);
404
405 err:
406 nla_put_failure:
407         kfree_skb(ans_skb);
408         return err;
409 }
410
411 static struct genl_ops brc_genl_ops_query_dp = {
412         .cmd = BRC_GENL_C_QUERY_MC,
413         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
414         .policy = NULL,
415         .doit = brc_genl_query,
416         .dumpit = NULL
417 };
418
419 /* Attribute policy: what each attribute may contain.  */
420 static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = {
421         [BRC_GENL_A_ERR_CODE] = { .type = NLA_U32 },
422
423         [BRC_GENL_A_PROC_DIR] = { .type = NLA_NUL_STRING },
424         [BRC_GENL_A_PROC_NAME] = { .type = NLA_NUL_STRING },
425         [BRC_GENL_A_PROC_DATA] = { .type = NLA_NUL_STRING },
426
427         [BRC_GENL_A_FDB_DATA] = { .type = NLA_UNSPEC },
428 };
429
430 static int
431 brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
432 {
433         unsigned long int flags;
434         int err;
435
436         if (!info->attrs[BRC_GENL_A_ERR_CODE])
437                 return -EINVAL;
438
439         skb = skb_clone(skb, GFP_KERNEL);
440         if (!skb)
441                 return -ENOMEM;
442
443         spin_lock_irqsave(&brc_lock, flags);
444         if (brc_seq == info->snd_seq) {
445                 brc_seq++;
446
447                 if (brc_reply)
448                         kfree_skb(brc_reply);
449                 brc_reply = skb;
450
451                 complete(&brc_done);
452                 err = 0;
453         } else {
454                 kfree_skb(skb);
455                 err = -ESTALE;
456         }
457         spin_unlock_irqrestore(&brc_lock, flags);
458
459         return err;
460 }
461
462 static struct genl_ops brc_genl_ops_dp_result = {
463         .cmd = BRC_GENL_C_DP_RESULT,
464         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
465         .policy = brc_genl_policy,
466         .doit = brc_genl_dp_result,
467         .dumpit = NULL
468 };
469
470 static struct genl_ops brc_genl_ops_set_proc = {
471         .cmd = BRC_GENL_C_SET_PROC,
472         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
473         .policy = brc_genl_policy,
474         .doit = brc_genl_set_proc,
475         .dumpit = NULL
476 };
477
478 static struct sk_buff *brc_send_command(struct sk_buff *request, struct nlattr **attrs)
479 {
480         unsigned long int flags;
481         struct sk_buff *reply;
482         int error;
483
484         mutex_lock(&brc_serial);
485
486         /* Increment sequence number first, so that we ignore any replies
487          * to stale requests. */
488         spin_lock_irqsave(&brc_lock, flags);
489         nlmsg_hdr(request)->nlmsg_seq = ++brc_seq;
490         INIT_COMPLETION(brc_done);
491         spin_unlock_irqrestore(&brc_lock, flags);
492
493         nlmsg_end(request, nlmsg_hdr(request));
494
495         /* Send message. */
496         error = genlmsg_multicast(request, 0, brc_mc_group.id, GFP_KERNEL);
497         if (error < 0)
498                 goto error;
499
500         /* Wait for reply. */
501         error = -ETIMEDOUT;
502         if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT))
503                 goto error;
504
505         /* Grab reply. */
506         spin_lock_irqsave(&brc_lock, flags);
507         reply = brc_reply;
508         brc_reply = NULL;
509         spin_unlock_irqrestore(&brc_lock, flags);
510
511         mutex_unlock(&brc_serial);
512
513         /* Re-parse message.  Can't fail, since it parsed correctly once
514          * already. */
515         error = nlmsg_parse(nlmsg_hdr(reply), GENL_HDRLEN,
516                             attrs, BRC_GENL_A_MAX, brc_genl_policy);
517         WARN_ON(error);
518
519         return reply;
520
521 error:
522         mutex_unlock(&brc_serial);
523         return ERR_PTR(error);
524 }
525
526 int brc_add_dp(struct datapath *dp)
527 {
528         if (!try_module_get(THIS_MODULE))
529                 return -ENODEV;
530         brc_sysfs_add_dp(dp);
531
532         return 0;
533 }
534
535 int brc_del_dp(struct datapath *dp) 
536 {
537         brc_sysfs_del_dp(dp);
538         module_put(THIS_MODULE);
539
540         return 0;
541 }
542
543 static int 
544 __init brc_init(void)
545 {
546         int i;
547         int err;
548
549         printk("Open vSwitch Bridge Compatibility, built "__DATE__" "__TIME__"\n");
550
551         rcu_read_lock();
552         for (i=0; i<ODP_MAX; i++) {
553                 if (get_dp(i)) {
554                         rcu_read_unlock();
555                         printk(KERN_EMERG "brcompat: no datapaths may exist!\n");
556                         return -EEXIST;
557                 }
558         }
559         rcu_read_unlock();
560
561         /* Set the bridge ioctl handler */
562         brioctl_set(brc_ioctl_deviceless_stub);
563
564         /* Set the openvswitch_mod device ioctl handler */
565         dp_ioctl_hook = brc_dev_ioctl;
566
567         /* Register hooks for datapath adds and deletes */
568         dp_add_dp_hook = brc_add_dp;
569         dp_del_dp_hook = brc_del_dp;
570
571         /* Register hooks for interface adds and deletes */
572         dp_add_if_hook = brc_sysfs_add_if;
573         dp_del_if_hook = brc_sysfs_del_if;
574
575         /* Randomize the initial sequence number.  This is not a security
576          * feature; it only helps avoid crossed wires between userspace and
577          * the kernel when the module is unloaded and reloaded. */
578         brc_seq = net_random();
579
580         /* Register generic netlink family to communicate changes to
581          * userspace. */
582         err = genl_register_family(&brc_genl_family);
583         if (err)
584                 goto error;
585
586         err = genl_register_ops(&brc_genl_family, &brc_genl_ops_query_dp);
587         if (err != 0) 
588                 goto err_unregister;
589
590         err = genl_register_ops(&brc_genl_family, &brc_genl_ops_dp_result);
591         if (err != 0) 
592                 goto err_unregister;
593
594         err = genl_register_ops(&brc_genl_family, &brc_genl_ops_set_proc);
595         if (err != 0) 
596                 goto err_unregister;
597
598         strcpy(brc_mc_group.name, "brcompat");
599         err = genl_register_mc_group(&brc_genl_family, &brc_mc_group);
600         if (err < 0)
601                 goto err_unregister;
602
603         return 0;
604
605 err_unregister:
606         genl_unregister_family(&brc_genl_family);
607 error:
608         printk(KERN_EMERG "brcompat: failed to install!");
609         return err;
610 }
611
612 static void 
613 brc_cleanup(void)
614 {
615         /* Unregister hooks for datapath adds and deletes */
616         dp_add_dp_hook = NULL;
617         dp_del_dp_hook = NULL;
618         
619         /* Unregister hooks for interface adds and deletes */
620         dp_add_if_hook = NULL;
621         dp_del_if_hook = NULL;
622
623         /* Unregister ioctl hooks */
624         dp_ioctl_hook = NULL;
625         brioctl_set(NULL);
626
627         genl_unregister_family(&brc_genl_family);
628         brc_procfs_exit();
629 }
630
631 module_init(brc_init);
632 module_exit(brc_cleanup);
633
634 MODULE_DESCRIPTION("Open vSwitch bridge compatibility");
635 MODULE_AUTHOR("Nicira Networks");
636 MODULE_LICENSE("GPL");