b68720bd811f3e031aab7237faeb83cd59bcc06b
[sliver-openvswitch.git] / datapath / brcompat.c
1 /*
2  * Copyright (c) 2009 Nicira Networks.
3  * Distributed under the terms of the GNU GPL version 2.
4  *
5  * Significant portions of this file may be copied from parts of the Linux
6  * kernel, by Linus Torvalds and others.
7  */
8
9 #include <linux/kernel.h>
10 #include <asm/uaccess.h>
11 #include <linux/completion.h>
12 #include <linux/etherdevice.h>
13 #include <linux/if_bridge.h>
14 #include <linux/netdevice.h>
15 #include <linux/rtnetlink.h>
16 #include <net/genetlink.h>
17
18 #include "compat.h"
19 #include "openvswitch/brcompat-netlink.h"
20 #include "brc_procfs.h"
21 #include "datapath.h"
22
23 static struct genl_family brc_genl_family;
24 static struct genl_multicast_group brc_mc_group;
25
26 /* Time to wait for ovs-vswitchd to respond to a datapath action, in
27  * jiffies. */
28 #define BRC_TIMEOUT (HZ * 5)
29
30 /* Mutex to serialize ovs-brcompatd callbacks.  (Some callbacks naturally hold
31  * br_ioctl_mutex, others hold rtnl_lock, but we can't take the former
32  * ourselves and we don't want to hold the latter over a potentially long
33  * period of time.) */
34 static DEFINE_MUTEX(brc_serial);
35
36 /* Userspace communication. */
37 static DEFINE_SPINLOCK(brc_lock);    /* Ensure atomic access to these vars. */
38 static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */
39 static struct sk_buff *brc_reply;    /* Reply from userspace. */
40 static u32 brc_seq;                  /* Sequence number for current op. */
41
42 static struct sk_buff *brc_send_command(struct sk_buff *, struct nlattr **attrs);
43 static int brc_send_simple_command(struct sk_buff *);
44
45 static struct sk_buff *brc_make_request(int op, const char *bridge,
46                                         const char *port)
47 {
48         struct sk_buff *skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
49         if (!skb)
50                 goto error;
51
52         genlmsg_put(skb, 0, 0, &brc_genl_family, 0, op);
53         if (bridge)
54                 NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
55         if (port)
56                 NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port);
57         return skb;
58
59 nla_put_failure:
60         kfree_skb(skb);
61 error:
62         return NULL;
63 }
64
65 static int brc_send_simple_command(struct sk_buff *request)
66 {
67         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
68         struct sk_buff *reply;
69         int error;
70
71         reply = brc_send_command(request, attrs);
72         if (IS_ERR(reply))
73                 return PTR_ERR(reply);
74
75         error = nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
76         kfree_skb(reply);
77         return -error;
78 }
79
80 static int brc_add_del_bridge(char __user *uname, int add)
81 {
82         struct sk_buff *request;
83         char name[IFNAMSIZ];
84
85         if (copy_from_user(name, uname, IFNAMSIZ))
86                 return -EFAULT;
87
88         name[IFNAMSIZ - 1] = 0;
89         request = brc_make_request(add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL,
90                                    name, NULL);
91         if (!request)
92                 return -ENOMEM;
93
94         return brc_send_simple_command(request);
95 }
96
97 static int brc_get_indices(int op, const char *br_name,
98                            int __user *uindices, int n)
99 {
100         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
101         struct sk_buff *request, *reply;
102         int *indices;
103         int ret;
104         int len;
105
106         if (n < 0)
107                 return -EINVAL;
108         if (n >= 2048)
109                 return -ENOMEM;
110
111         request = brc_make_request(op, br_name, NULL);
112         if (!request)
113                 return -ENOMEM;
114
115         reply = brc_send_command(request, attrs);
116         ret = PTR_ERR(reply);
117         if (IS_ERR(reply))
118                 goto exit;
119
120         ret = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
121         if (ret < 0)
122                 goto exit_free_skb;
123
124         ret = -EINVAL;
125         if (!attrs[BRC_GENL_A_IFINDEXES])
126                 goto exit_free_skb;
127
128         len = nla_len(attrs[BRC_GENL_A_IFINDEXES]);
129         indices = nla_data(attrs[BRC_GENL_A_IFINDEXES]);
130         if (len % sizeof(int))
131                 goto exit_free_skb;
132
133         n = min_t(int, n, len / sizeof(int));
134         ret = copy_to_user(uindices, indices, n * sizeof(int)) ? -EFAULT : n;
135
136 exit_free_skb:
137         kfree_skb(reply);
138 exit:
139         return ret;
140 }
141
142 /* Called with br_ioctl_mutex. */
143 static int brc_get_bridges(int __user *uindices, int n)
144 {
145         return brc_get_indices(BRC_GENL_C_GET_BRIDGES, NULL, uindices, n);
146 }
147
148 /* Legacy deviceless bridge ioctl's.  Called with br_ioctl_mutex. */
149 static int old_deviceless(void __user *uarg)
150 {
151         unsigned long args[3];
152
153         if (copy_from_user(args, uarg, sizeof(args)))
154                 return -EFAULT;
155
156         switch (args[0]) {
157         case BRCTL_GET_BRIDGES:
158                 return brc_get_bridges((int __user *)args[1], args[2]);
159
160         case BRCTL_ADD_BRIDGE:
161                 return brc_add_del_bridge((void __user *)args[1], 1);
162         case BRCTL_DEL_BRIDGE:
163                 return brc_add_del_bridge((void __user *)args[1], 0);
164         }
165
166         return -EOPNOTSUPP;
167 }
168
169 /* Called with the br_ioctl_mutex. */
170 static int
171 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
172 brc_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)
173 #else
174 brc_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
175 #endif
176 {
177         switch (cmd) {
178         case SIOCGIFBR:
179         case SIOCSIFBR:
180                 return old_deviceless(uarg);
181
182         case SIOCBRADDBR:
183                 return brc_add_del_bridge(uarg, 1);
184         case SIOCBRDELBR:
185                 return brc_add_del_bridge(uarg, 0);
186         }
187
188         return -EOPNOTSUPP;
189 }
190
191 static int brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
192 {
193         struct sk_buff *request;
194         struct net_device *port;
195         int err;
196
197         port = __dev_get_by_index(&init_net, port_ifindex);
198         if (!port)
199                 return -EINVAL;
200
201         /* Save name of dev and port because there's a race between the
202          * rtnl_unlock() and the brc_send_simple_command(). */
203         request = brc_make_request(add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL,
204                                    dev->name, port->name);
205         if (!request)
206                 return -ENOMEM;
207
208         rtnl_unlock();
209         err = brc_send_simple_command(request);
210         rtnl_lock();
211
212         return err;
213 }
214
215 static int brc_get_bridge_info(struct net_device *dev,
216                                struct __bridge_info __user *ub)
217 {
218         struct __bridge_info b;
219         u64 id = 0;
220         int i;
221
222         memset(&b, 0, sizeof(struct __bridge_info));
223
224         for (i=0; i<ETH_ALEN; i++)
225                 id |= (u64)dev->dev_addr[i] << (8*(ETH_ALEN-1 - i));
226         b.bridge_id = cpu_to_be64(id);
227         b.stp_enabled = 0;
228
229         if (copy_to_user(ub, &b, sizeof(struct __bridge_info)))
230                 return -EFAULT;
231
232         return 0;
233 }
234
235 static int brc_get_port_list(struct net_device *dev, int __user *uindices,
236                              int num)
237 {
238         int retval;
239
240         rtnl_unlock();
241         retval = brc_get_indices(BRC_GENL_C_GET_PORTS, dev->name,
242                                  uindices, num);
243         rtnl_lock();
244
245         return retval;
246 }
247
248 /*
249  * Format up to a page worth of forwarding table entries
250  * userbuf -- where to copy result
251  * maxnum  -- maximum number of entries desired
252  *            (limited to a page for sanity)
253  * offset  -- number of records to skip
254  */
255 static int brc_get_fdb_entries(struct net_device *dev, void __user *userbuf, 
256                                unsigned long maxnum, unsigned long offset)
257 {
258         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
259         struct sk_buff *request, *reply;
260         int retval;
261         int len;
262
263         /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */
264         if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry))
265                 maxnum = PAGE_SIZE/sizeof(struct __fdb_entry);
266
267         request = brc_make_request(BRC_GENL_C_FDB_QUERY, dev->name, NULL);
268         if (!request)
269                 return -ENOMEM;
270         NLA_PUT_U64(request, BRC_GENL_A_FDB_COUNT, maxnum);
271         NLA_PUT_U64(request, BRC_GENL_A_FDB_SKIP, offset);
272
273         rtnl_unlock();
274         reply = brc_send_command(request, attrs);
275         retval = PTR_ERR(reply);
276         if (IS_ERR(reply))
277                 goto exit;
278
279         retval = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
280         if (retval < 0)
281                 goto exit_free_skb;
282
283         retval = -EINVAL;
284         if (!attrs[BRC_GENL_A_FDB_DATA])
285                 goto exit_free_skb;
286         len = nla_len(attrs[BRC_GENL_A_FDB_DATA]);
287         if (len % sizeof(struct __fdb_entry) ||
288             len / sizeof(struct __fdb_entry) > maxnum)
289                 goto exit_free_skb;
290
291         retval = len / sizeof(struct __fdb_entry);
292         if (copy_to_user(userbuf, nla_data(attrs[BRC_GENL_A_FDB_DATA]), len))
293                 retval = -EFAULT;
294
295 exit_free_skb:
296         kfree_skb(reply);
297 exit:
298         rtnl_lock();
299         return retval;
300
301 nla_put_failure:
302         kfree_skb(request);
303         return -ENOMEM;
304 }
305
306 /* Legacy ioctl's through SIOCDEVPRIVATE.  Called with rtnl_lock. */
307 static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
308 {
309         unsigned long args[4];
310
311         if (copy_from_user(args, rq->ifr_data, sizeof(args)))
312                 return -EFAULT;
313
314         switch (args[0]) {
315         case BRCTL_ADD_IF:
316                 return brc_add_del_port(dev, args[1], 1);
317         case BRCTL_DEL_IF:
318                 return brc_add_del_port(dev, args[1], 0);
319
320         case BRCTL_GET_BRIDGE_INFO:
321                 return brc_get_bridge_info(dev, (struct __bridge_info __user *)args[1]);
322
323         case BRCTL_GET_PORT_LIST:
324                 return brc_get_port_list(dev, (int __user *)args[1], args[2]);
325
326         case BRCTL_GET_FDB_ENTRIES:
327                 return brc_get_fdb_entries(dev, (void __user *)args[1],
328                                            args[2], args[3]);
329         }
330
331         return -EOPNOTSUPP;
332 }
333
334 /* Called with the rtnl_lock. */
335 static int brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
336 {
337         int err;
338
339         switch (cmd) {
340                 case SIOCDEVPRIVATE:
341                         err = old_dev_ioctl(dev, rq, cmd);
342                         break;
343
344                 case SIOCBRADDIF:
345                         return brc_add_del_port(dev, rq->ifr_ifindex, 1);
346                 case SIOCBRDELIF:
347                         return brc_add_del_port(dev, rq->ifr_ifindex, 0);
348
349                 default:
350                         err = -EOPNOTSUPP;
351                         break;
352         }
353
354         return err;
355 }
356
357
358 static struct genl_family brc_genl_family = {
359         .id = GENL_ID_GENERATE,
360         .hdrsize = 0,
361         .name = BRC_GENL_FAMILY_NAME,
362         .version = 1,
363         .maxattr = BRC_GENL_A_MAX,
364 };
365
366 static int brc_genl_query(struct sk_buff *skb, struct genl_info *info)
367 {
368         int err = -EINVAL;
369         struct sk_buff *ans_skb;
370         void *data;
371
372         ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
373         if (!ans_skb) 
374                 return -ENOMEM;
375
376         data = genlmsg_put_reply(ans_skb, info, &brc_genl_family,
377                                  0, BRC_GENL_C_QUERY_MC);
378         if (data == NULL) {
379                 err = -ENOMEM;
380                 goto err;
381         }
382         NLA_PUT_U32(ans_skb, BRC_GENL_A_MC_GROUP, brc_mc_group.id);
383
384         genlmsg_end(ans_skb, data);
385         return genlmsg_reply(ans_skb, info);
386
387 err:
388 nla_put_failure:
389         kfree_skb(ans_skb);
390         return err;
391 }
392
393 static struct genl_ops brc_genl_ops_query_dp = {
394         .cmd = BRC_GENL_C_QUERY_MC,
395         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
396         .policy = NULL,
397         .doit = brc_genl_query,
398         .dumpit = NULL
399 };
400
401 /* Attribute policy: what each attribute may contain.  */
402 static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = {
403         [BRC_GENL_A_ERR_CODE] = { .type = NLA_U32 },
404
405         [BRC_GENL_A_PROC_DIR] = { .type = NLA_NUL_STRING },
406         [BRC_GENL_A_PROC_NAME] = { .type = NLA_NUL_STRING },
407         [BRC_GENL_A_PROC_DATA] = { .type = NLA_NUL_STRING },
408
409         [BRC_GENL_A_FDB_DATA] = { .type = NLA_UNSPEC },
410 };
411
412 static int brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
413 {
414         unsigned long int flags;
415         int err;
416
417         if (!info->attrs[BRC_GENL_A_ERR_CODE])
418                 return -EINVAL;
419
420         skb = skb_clone(skb, GFP_KERNEL);
421         if (!skb)
422                 return -ENOMEM;
423
424         spin_lock_irqsave(&brc_lock, flags);
425         if (brc_seq == info->snd_seq) {
426                 brc_seq++;
427
428                 if (brc_reply)
429                         kfree_skb(brc_reply);
430                 brc_reply = skb;
431
432                 complete(&brc_done);
433                 err = 0;
434         } else {
435                 kfree_skb(skb);
436                 err = -ESTALE;
437         }
438         spin_unlock_irqrestore(&brc_lock, flags);
439
440         return err;
441 }
442
443 static struct genl_ops brc_genl_ops_dp_result = {
444         .cmd = BRC_GENL_C_DP_RESULT,
445         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
446         .policy = brc_genl_policy,
447         .doit = brc_genl_dp_result,
448         .dumpit = NULL
449 };
450
451 static struct genl_ops brc_genl_ops_set_proc = {
452         .cmd = BRC_GENL_C_SET_PROC,
453         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
454         .policy = brc_genl_policy,
455         .doit = brc_genl_set_proc,
456         .dumpit = NULL
457 };
458
459 static struct sk_buff *brc_send_command(struct sk_buff *request,
460                                         struct nlattr **attrs)
461 {
462         unsigned long int flags;
463         struct sk_buff *reply;
464         int error;
465
466         mutex_lock(&brc_serial);
467
468         /* Increment sequence number first, so that we ignore any replies
469          * to stale requests. */
470         spin_lock_irqsave(&brc_lock, flags);
471         nlmsg_hdr(request)->nlmsg_seq = ++brc_seq;
472         INIT_COMPLETION(brc_done);
473         spin_unlock_irqrestore(&brc_lock, flags);
474
475         nlmsg_end(request, nlmsg_hdr(request));
476
477         /* Send message. */
478         error = genlmsg_multicast(request, 0, brc_mc_group.id, GFP_KERNEL);
479         if (error < 0)
480                 goto error;
481
482         /* Wait for reply. */
483         error = -ETIMEDOUT;
484         if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT)) {
485                 printk(KERN_WARNING "brcompat: timed out waiting for userspace\n");
486                 goto error;
487     }
488
489         /* Grab reply. */
490         spin_lock_irqsave(&brc_lock, flags);
491         reply = brc_reply;
492         brc_reply = NULL;
493         spin_unlock_irqrestore(&brc_lock, flags);
494
495         mutex_unlock(&brc_serial);
496
497         /* Re-parse message.  Can't fail, since it parsed correctly once
498          * already. */
499         error = nlmsg_parse(nlmsg_hdr(reply), GENL_HDRLEN,
500                             attrs, BRC_GENL_A_MAX, brc_genl_policy);
501         WARN_ON(error);
502
503         return reply;
504
505 error:
506         mutex_unlock(&brc_serial);
507         return ERR_PTR(error);
508 }
509
510 static int __init brc_init(void)
511 {
512         int err;
513
514         printk("Open vSwitch Bridge Compatibility, built "__DATE__" "__TIME__"\n");
515
516         /* Set the bridge ioctl handler */
517         brioctl_set(brc_ioctl_deviceless_stub);
518
519         /* Set the openvswitch_mod device ioctl handler */
520         dp_ioctl_hook = brc_dev_ioctl;
521
522         /* Randomize the initial sequence number.  This is not a security
523          * feature; it only helps avoid crossed wires between userspace and
524          * the kernel when the module is unloaded and reloaded. */
525         brc_seq = net_random();
526
527         /* Register generic netlink family to communicate changes to
528          * userspace. */
529         err = genl_register_family(&brc_genl_family);
530         if (err)
531                 goto error;
532
533         err = genl_register_ops(&brc_genl_family, &brc_genl_ops_query_dp);
534         if (err != 0) 
535                 goto err_unregister;
536
537         err = genl_register_ops(&brc_genl_family, &brc_genl_ops_dp_result);
538         if (err != 0) 
539                 goto err_unregister;
540
541         err = genl_register_ops(&brc_genl_family, &brc_genl_ops_set_proc);
542         if (err != 0) 
543                 goto err_unregister;
544
545         strcpy(brc_mc_group.name, "brcompat");
546         err = genl_register_mc_group(&brc_genl_family, &brc_mc_group);
547         if (err < 0)
548                 goto err_unregister;
549
550         return 0;
551
552 err_unregister:
553         genl_unregister_family(&brc_genl_family);
554 error:
555         printk(KERN_EMERG "brcompat: failed to install!");
556         return err;
557 }
558
559 static void brc_cleanup(void)
560 {
561         /* Unregister ioctl hooks */
562         dp_ioctl_hook = NULL;
563         brioctl_set(NULL);
564
565         genl_unregister_family(&brc_genl_family);
566         brc_procfs_exit();
567 }
568
569 module_init(brc_init);
570 module_exit(brc_cleanup);
571
572 MODULE_DESCRIPTION("Open vSwitch bridge compatibility");
573 MODULE_AUTHOR("Nicira Networks");
574 MODULE_LICENSE("GPL");