datapath: Rename brc_sysfs_* to dp_sysfs_*.
[sliver-openvswitch.git] / datapath / brcompat.c
1 /*
2  * Copyright (c) 2009 Nicira Networks.
3  * Distributed under the terms of the GNU GPL version 2.
4  *
5  * Significant portions of this file may be copied from parts of the Linux
6  * kernel, by Linus Torvalds and others.
7  */
8
9 #include <linux/kernel.h>
10 #include <asm/uaccess.h>
11 #include <linux/completion.h>
12 #include <linux/delay.h>
13 #include <linux/etherdevice.h>
14 #include <linux/if_bridge.h>
15 #include <linux/rculist.h>
16 #include <linux/netdevice.h>
17 #include <linux/rtnetlink.h>
18 #include <net/genetlink.h>
19
20 #include "compat.h"
21 #include "openvswitch/brcompat-netlink.h"
22 #include "brc_procfs.h"
23 #include "datapath.h"
24 #include "dp_dev.h"
25
26 static struct genl_family brc_genl_family;
27 static struct genl_multicast_group brc_mc_group;
28
29 /* Time to wait for ovs-vswitchd to respond to a datapath action, in
30  * jiffies. */
31 #define BRC_TIMEOUT (HZ * 5)
32
33 /* Mutex to serialize ovs-brcompatd callbacks.  (Some callbacks naturally hold
34  * br_ioctl_mutex, others hold rtnl_lock, but we can't take the former
35  * ourselves and we don't want to hold the latter over a potentially long
36  * period of time.) */
37 static DEFINE_MUTEX(brc_serial);
38
39 /* Userspace communication. */
40 static DEFINE_SPINLOCK(brc_lock);    /* Ensure atomic access to these vars. */
41 static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */
42 static struct sk_buff *brc_reply;    /* Reply from userspace. */
43 static u32 brc_seq;                  /* Sequence number for current op. */
44
45 static struct sk_buff *brc_send_command(struct sk_buff *, struct nlattr **attrs);
46 static int brc_send_simple_command(struct sk_buff *);
47
48 static int
49 get_dp_ifindices(int *indices, int num)
50 {
51         int i, index = 0;
52
53         rcu_read_lock();
54         for (i=0; i < ODP_MAX && index < num; i++) {
55                 struct datapath *dp = get_dp(i);
56                 if (!dp)
57                         continue;
58                 indices[index++] = dp->ports[ODPP_LOCAL]->dev->ifindex;
59         }
60         rcu_read_unlock();
61
62         return index;
63 }
64
65 static void
66 get_port_ifindices(struct datapath *dp, int *ifindices, int num)
67 {
68         struct net_bridge_port *p;
69
70         rcu_read_lock();
71         list_for_each_entry_rcu (p, &dp->port_list, node) {
72                 if (p->port_no < num)
73                         ifindices[p->port_no] = p->dev->ifindex;
74         }
75         rcu_read_unlock();
76 }
77
78 static struct sk_buff *
79 brc_make_request(int op, const char *bridge, const char *port)
80 {
81         struct sk_buff *skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
82         if (!skb)
83                 goto error;
84
85         genlmsg_put(skb, 0, 0, &brc_genl_family, 0, op);
86         NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
87         if (port)
88                 NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port);
89         return skb;
90
91 nla_put_failure:
92         kfree_skb(skb);
93 error:
94         return NULL;
95 }
96
97 static int brc_send_simple_command(struct sk_buff *request)
98 {
99         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
100         struct sk_buff *reply;
101         int error;
102
103         reply = brc_send_command(request, attrs);
104         if (IS_ERR(reply))
105                 return PTR_ERR(reply);
106
107         error = nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
108         kfree_skb(reply);
109         return -error;
110 }
111
112 static int brc_add_del_bridge(char __user *uname, int add)
113 {
114         struct sk_buff *request;
115         char name[IFNAMSIZ];
116
117         if (copy_from_user(name, uname, IFNAMSIZ))
118                 return -EFAULT;
119
120         name[IFNAMSIZ - 1] = 0;
121         request = brc_make_request(add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL,
122                                    name, NULL);
123         if (!request)
124                 return -ENOMEM;
125
126         return brc_send_simple_command(request);
127 }
128
129 static int brc_get_bridges(int __user *uindices, int n)
130 {
131         int *indices;
132         int ret;
133
134         if (n >= 2048)
135                 return -ENOMEM;
136
137         indices = kcalloc(n, sizeof(int), GFP_KERNEL);
138         if (indices == NULL)
139                 return -ENOMEM;
140
141         n = get_dp_ifindices(indices, n);
142
143         ret = copy_to_user(uindices, indices, n * sizeof(int)) ? -EFAULT : n;
144
145         kfree(indices);
146         return ret;
147 }
148
149 /* Legacy deviceless bridge ioctl's.  Called with br_ioctl_mutex. */
150 static int
151 old_deviceless(void __user *uarg)
152 {
153         unsigned long args[3];
154
155         if (copy_from_user(args, uarg, sizeof(args)))
156                 return -EFAULT;
157
158         switch (args[0]) {
159         case BRCTL_GET_BRIDGES:
160                 return brc_get_bridges((int __user *)args[1], args[2]);
161
162         case BRCTL_ADD_BRIDGE:
163                 return brc_add_del_bridge((void __user *)args[1], 1);
164         case BRCTL_DEL_BRIDGE:
165                 return brc_add_del_bridge((void __user *)args[1], 0);
166         }
167
168         return -EOPNOTSUPP;
169 }
170
171 /* Called with the br_ioctl_mutex. */
172 static int
173 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
174 brc_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)
175 #else
176 brc_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
177 #endif
178 {
179         switch (cmd) {
180         case SIOCGIFBR:
181         case SIOCSIFBR:
182                 return old_deviceless(uarg);
183
184         case SIOCBRADDBR:
185                 return brc_add_del_bridge(uarg, 1);
186         case SIOCBRDELBR:
187                 return brc_add_del_bridge(uarg, 0);
188         }
189
190         return -EOPNOTSUPP;
191 }
192
193 static int
194 brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
195 {
196         struct sk_buff *request;
197         struct net_device *port;
198         int err;
199
200         port = __dev_get_by_index(&init_net, port_ifindex);
201         if (!port)
202                 return -EINVAL;
203
204         /* Save name of dev and port because there's a race between the
205          * rtnl_unlock() and the brc_send_simple_command(). */
206         request = brc_make_request(add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL,
207                                    dev->name, port->name);
208         if (!request)
209                 return -ENOMEM;
210
211         rtnl_unlock();
212         err = brc_send_simple_command(request);
213         rtnl_lock();
214
215         return err;
216 }
217
218 static int
219 brc_get_bridge_info(struct net_device *dev, struct __bridge_info __user *ub)
220 {
221         struct __bridge_info b;
222         u64 id = 0;
223         int i;
224
225         memset(&b, 0, sizeof(struct __bridge_info));
226
227         for (i=0; i<ETH_ALEN; i++)
228                 id |= (u64)dev->dev_addr[i] << (8*(ETH_ALEN-1 - i));
229         b.bridge_id = cpu_to_be64(id);
230         b.stp_enabled = 0;
231
232         if (copy_to_user(ub, &b, sizeof(struct __bridge_info)))
233                 return -EFAULT;
234
235         return 0;
236 }
237
238 static int
239 brc_get_port_list(struct net_device *dev, int __user *uindices, int num)
240 {
241         struct dp_dev *dp_dev = netdev_priv(dev);
242         struct datapath *dp = dp_dev->dp;
243         int *indices;
244
245         if (num < 0)
246                 return -EINVAL;
247         if (num == 0)
248                 num = 256;
249         if (num > DP_MAX_PORTS)
250                 num = DP_MAX_PORTS;
251
252         indices = kcalloc(num, sizeof(int), GFP_KERNEL);
253         if (indices == NULL)
254                 return -ENOMEM;
255
256         get_port_ifindices(dp, indices, num);
257         if (copy_to_user(uindices, indices, num * sizeof(int)))
258                 num = -EFAULT;
259         kfree(indices);
260         return num;
261 }
262
263 /*
264  * Format up to a page worth of forwarding table entries
265  * userbuf -- where to copy result
266  * maxnum  -- maximum number of entries desired
267  *            (limited to a page for sanity)
268  * offset  -- number of records to skip
269  */
270 static int brc_get_fdb_entries(struct net_device *dev, void __user *userbuf, 
271                                unsigned long maxnum, unsigned long offset)
272 {
273         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
274         struct sk_buff *request, *reply;
275         int retval;
276         int len;
277
278         /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */
279         if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry))
280                 maxnum = PAGE_SIZE/sizeof(struct __fdb_entry);
281
282         request = brc_make_request(BRC_GENL_C_FDB_QUERY, dev->name, NULL);
283         if (!request)
284                 return -ENOMEM;
285         NLA_PUT_U64(request, BRC_GENL_A_FDB_COUNT, maxnum);
286         NLA_PUT_U64(request, BRC_GENL_A_FDB_SKIP, offset);
287
288         rtnl_unlock();
289         reply = brc_send_command(request, attrs);
290         retval = PTR_ERR(reply);
291         if (IS_ERR(reply))
292                 goto exit;
293
294         retval = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
295         if (retval < 0)
296                 goto exit_free_skb;
297
298         retval = -EINVAL;
299         if (!attrs[BRC_GENL_A_FDB_DATA])
300                 goto exit_free_skb;
301         len = nla_len(attrs[BRC_GENL_A_FDB_DATA]);
302         if (len % sizeof(struct __fdb_entry) ||
303             len / sizeof(struct __fdb_entry) > maxnum)
304                 goto exit_free_skb;
305
306         retval = len / sizeof(struct __fdb_entry);
307         if (copy_to_user(userbuf, nla_data(attrs[BRC_GENL_A_FDB_DATA]), len))
308                 retval = -EFAULT;
309
310 exit_free_skb:
311         kfree_skb(reply);
312 exit:
313         rtnl_lock();
314         return retval;
315
316 nla_put_failure:
317         kfree_skb(request);
318         return -ENOMEM;
319 }
320
321 /* Legacy ioctl's through SIOCDEVPRIVATE.  Called with rtnl_lock. */
322 static int
323 old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
324 {
325         unsigned long args[4];
326
327         if (copy_from_user(args, rq->ifr_data, sizeof(args)))
328                 return -EFAULT;
329
330         switch (args[0]) {
331         case BRCTL_ADD_IF:
332                 return brc_add_del_port(dev, args[1], 1);
333         case BRCTL_DEL_IF:
334                 return brc_add_del_port(dev, args[1], 0);
335
336         case BRCTL_GET_BRIDGE_INFO:
337                 return brc_get_bridge_info(dev, (struct __bridge_info __user *)args[1]);
338
339         case BRCTL_GET_PORT_LIST:
340                 return brc_get_port_list(dev, (int __user *)args[1], args[2]);
341
342         case BRCTL_GET_FDB_ENTRIES:
343                 return brc_get_fdb_entries(dev, (void __user *)args[1],
344                                            args[2], args[3]);
345         }
346
347         return -EOPNOTSUPP;
348 }
349
350 /* Called with the rtnl_lock. */
351 static int
352 brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
353 {
354         int err;
355
356         switch (cmd) {
357                 case SIOCDEVPRIVATE:
358                         err = old_dev_ioctl(dev, rq, cmd);
359                         break;
360
361                 case SIOCBRADDIF:
362                         return brc_add_del_port(dev, rq->ifr_ifindex, 1);
363                 case SIOCBRDELIF:
364                         return brc_add_del_port(dev, rq->ifr_ifindex, 0);
365
366                 default:
367                         err = -EOPNOTSUPP;
368                         break;
369         }
370
371         return err;
372 }
373
374
375 static struct genl_family brc_genl_family = {
376         .id = GENL_ID_GENERATE,
377         .hdrsize = 0,
378         .name = BRC_GENL_FAMILY_NAME,
379         .version = 1,
380         .maxattr = BRC_GENL_A_MAX,
381 };
382
383 static int brc_genl_query(struct sk_buff *skb, struct genl_info *info)
384 {
385         int err = -EINVAL;
386         struct sk_buff *ans_skb;
387         void *data;
388
389         ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
390         if (!ans_skb) 
391                 return -ENOMEM;
392
393         data = genlmsg_put_reply(ans_skb, info, &brc_genl_family,
394                                  0, BRC_GENL_C_QUERY_MC);
395         if (data == NULL) {
396                 err = -ENOMEM;
397                 goto err;
398         }
399         NLA_PUT_U32(ans_skb, BRC_GENL_A_MC_GROUP, brc_mc_group.id);
400
401         genlmsg_end(ans_skb, data);
402         return genlmsg_reply(ans_skb, info);
403
404 err:
405 nla_put_failure:
406         kfree_skb(ans_skb);
407         return err;
408 }
409
410 static struct genl_ops brc_genl_ops_query_dp = {
411         .cmd = BRC_GENL_C_QUERY_MC,
412         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
413         .policy = NULL,
414         .doit = brc_genl_query,
415         .dumpit = NULL
416 };
417
418 /* Attribute policy: what each attribute may contain.  */
419 static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = {
420         [BRC_GENL_A_ERR_CODE] = { .type = NLA_U32 },
421
422         [BRC_GENL_A_PROC_DIR] = { .type = NLA_NUL_STRING },
423         [BRC_GENL_A_PROC_NAME] = { .type = NLA_NUL_STRING },
424         [BRC_GENL_A_PROC_DATA] = { .type = NLA_NUL_STRING },
425
426         [BRC_GENL_A_FDB_DATA] = { .type = NLA_UNSPEC },
427 };
428
429 static int
430 brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
431 {
432         unsigned long int flags;
433         int err;
434
435         if (!info->attrs[BRC_GENL_A_ERR_CODE])
436                 return -EINVAL;
437
438         skb = skb_clone(skb, GFP_KERNEL);
439         if (!skb)
440                 return -ENOMEM;
441
442         spin_lock_irqsave(&brc_lock, flags);
443         if (brc_seq == info->snd_seq) {
444                 brc_seq++;
445
446                 if (brc_reply)
447                         kfree_skb(brc_reply);
448                 brc_reply = skb;
449
450                 complete(&brc_done);
451                 err = 0;
452         } else {
453                 kfree_skb(skb);
454                 err = -ESTALE;
455         }
456         spin_unlock_irqrestore(&brc_lock, flags);
457
458         return err;
459 }
460
461 static struct genl_ops brc_genl_ops_dp_result = {
462         .cmd = BRC_GENL_C_DP_RESULT,
463         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
464         .policy = brc_genl_policy,
465         .doit = brc_genl_dp_result,
466         .dumpit = NULL
467 };
468
469 static struct genl_ops brc_genl_ops_set_proc = {
470         .cmd = BRC_GENL_C_SET_PROC,
471         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
472         .policy = brc_genl_policy,
473         .doit = brc_genl_set_proc,
474         .dumpit = NULL
475 };
476
477 static struct sk_buff *brc_send_command(struct sk_buff *request, struct nlattr **attrs)
478 {
479         unsigned long int flags;
480         struct sk_buff *reply;
481         int error;
482
483         mutex_lock(&brc_serial);
484
485         /* Increment sequence number first, so that we ignore any replies
486          * to stale requests. */
487         spin_lock_irqsave(&brc_lock, flags);
488         nlmsg_hdr(request)->nlmsg_seq = ++brc_seq;
489         INIT_COMPLETION(brc_done);
490         spin_unlock_irqrestore(&brc_lock, flags);
491
492         nlmsg_end(request, nlmsg_hdr(request));
493
494         /* Send message. */
495         error = genlmsg_multicast(request, 0, brc_mc_group.id, GFP_KERNEL);
496         if (error < 0)
497                 goto error;
498
499         /* Wait for reply. */
500         error = -ETIMEDOUT;
501         if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT))
502                 goto error;
503
504         /* Grab reply. */
505         spin_lock_irqsave(&brc_lock, flags);
506         reply = brc_reply;
507         brc_reply = NULL;
508         spin_unlock_irqrestore(&brc_lock, flags);
509
510         mutex_unlock(&brc_serial);
511
512         /* Re-parse message.  Can't fail, since it parsed correctly once
513          * already. */
514         error = nlmsg_parse(nlmsg_hdr(reply), GENL_HDRLEN,
515                             attrs, BRC_GENL_A_MAX, brc_genl_policy);
516         WARN_ON(error);
517
518         return reply;
519
520 error:
521         mutex_unlock(&brc_serial);
522         return ERR_PTR(error);
523 }
524
525 static int 
526 __init brc_init(void)
527 {
528         int i;
529         int err;
530
531         printk("Open vSwitch Bridge Compatibility, built "__DATE__" "__TIME__"\n");
532
533         rcu_read_lock();
534         for (i=0; i<ODP_MAX; i++) {
535                 if (get_dp(i)) {
536                         rcu_read_unlock();
537                         printk(KERN_EMERG "brcompat: no datapaths may exist!\n");
538                         return -EEXIST;
539                 }
540         }
541         rcu_read_unlock();
542
543         /* Set the bridge ioctl handler */
544         brioctl_set(brc_ioctl_deviceless_stub);
545
546         /* Set the openvswitch_mod device ioctl handler */
547         dp_ioctl_hook = brc_dev_ioctl;
548
549         /* Randomize the initial sequence number.  This is not a security
550          * feature; it only helps avoid crossed wires between userspace and
551          * the kernel when the module is unloaded and reloaded. */
552         brc_seq = net_random();
553
554         /* Register generic netlink family to communicate changes to
555          * userspace. */
556         err = genl_register_family(&brc_genl_family);
557         if (err)
558                 goto error;
559
560         err = genl_register_ops(&brc_genl_family, &brc_genl_ops_query_dp);
561         if (err != 0) 
562                 goto err_unregister;
563
564         err = genl_register_ops(&brc_genl_family, &brc_genl_ops_dp_result);
565         if (err != 0) 
566                 goto err_unregister;
567
568         err = genl_register_ops(&brc_genl_family, &brc_genl_ops_set_proc);
569         if (err != 0) 
570                 goto err_unregister;
571
572         strcpy(brc_mc_group.name, "brcompat");
573         err = genl_register_mc_group(&brc_genl_family, &brc_mc_group);
574         if (err < 0)
575                 goto err_unregister;
576
577         return 0;
578
579 err_unregister:
580         genl_unregister_family(&brc_genl_family);
581 error:
582         printk(KERN_EMERG "brcompat: failed to install!");
583         return err;
584 }
585
586 static void 
587 brc_cleanup(void)
588 {
589         /* Unregister ioctl hooks */
590         dp_ioctl_hook = NULL;
591         brioctl_set(NULL);
592
593         genl_unregister_family(&brc_genl_family);
594         brc_procfs_exit();
595 }
596
597 module_init(brc_init);
598 module_exit(brc_cleanup);
599
600 MODULE_DESCRIPTION("Open vSwitch bridge compatibility");
601 MODULE_AUTHOR("Nicira Networks");
602 MODULE_LICENSE("GPL");