Global replace of Nicira Networks.
[sliver-openvswitch.git] / datapath / brcompat_main.c
1 /*
2  * Copyright (c) 2007-2012 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/module.h>
22 #include <linux/kernel.h>
23 #include <linux/uaccess.h>
24 #include <linux/completion.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_bridge.h>
27 #include <linux/netdevice.h>
28 #include <linux/rtnetlink.h>
29 #include <net/genetlink.h>
30
31 #include "openvswitch/brcompat-netlink.h"
32 #include "datapath.h"
33
34 static struct genl_family brc_genl_family;
35 static struct genl_multicast_group brc_mc_group;
36
37 /* Time to wait for ovs-vswitchd to respond to a datapath action, in
38  * jiffies. */
39 #define BRC_TIMEOUT (HZ * 5)
40
41 /* Mutex to serialize ovs-brcompatd callbacks.  (Some callbacks naturally hold
42  * br_ioctl_mutex, others hold rtnl_lock, but we can't take the former
43  * ourselves and we don't want to hold the latter over a potentially long
44  * period of time.) */
45 static DEFINE_MUTEX(brc_serial);
46
47 /* Userspace communication. */
48 static DEFINE_SPINLOCK(brc_lock);    /* Ensure atomic access to these vars. */
49 static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */
50 static struct sk_buff *brc_reply;    /* Reply from userspace. */
51 static u32 brc_seq;                  /* Sequence number for current op. */
52
53 static struct sk_buff *brc_send_command(struct net *,
54                                         struct sk_buff *,
55                                         struct nlattr **attrs);
56 static int brc_send_simple_command(struct net *, struct sk_buff *);
57
58 static struct sk_buff *brc_make_request(int op, const char *bridge,
59                                         const char *port)
60 {
61         struct sk_buff *skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
62         if (!skb)
63                 goto error;
64
65         genlmsg_put(skb, 0, 0, &brc_genl_family, 0, op);
66
67         if (bridge && nla_put_string(skb, BRC_GENL_A_DP_NAME, bridge))
68                 goto nla_put_failure;
69         if (port && nla_put_string(skb, BRC_GENL_A_PORT_NAME, port))
70                 goto nla_put_failure;
71
72         return skb;
73
74 nla_put_failure:
75         kfree_skb(skb);
76 error:
77         return NULL;
78 }
79
80 static int brc_send_simple_command(struct net *net, struct sk_buff *request)
81 {
82         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
83         struct sk_buff *reply;
84         int error;
85
86         reply = brc_send_command(net, request, attrs);
87         if (IS_ERR(reply))
88                 return PTR_ERR(reply);
89
90         error = nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
91         kfree_skb(reply);
92         return -error;
93 }
94
95 static int brc_add_del_bridge(struct net *net, char __user *uname, int add)
96 {
97         struct sk_buff *request;
98         char name[IFNAMSIZ];
99
100         if (!capable(CAP_NET_ADMIN))
101                 return -EPERM;
102
103         if (copy_from_user(name, uname, IFNAMSIZ))
104                 return -EFAULT;
105
106         name[IFNAMSIZ - 1] = 0;
107         request = brc_make_request(add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL,
108                                    name, NULL);
109         if (!request)
110                 return -ENOMEM;
111
112         return brc_send_simple_command(net, request);
113 }
114
115 static int brc_get_indices(struct net *net,
116                            int op, const char *br_name,
117                            int __user *uindices, int n)
118 {
119         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
120         struct sk_buff *request, *reply;
121         int *indices;
122         int ret;
123         int len;
124
125         if (n < 0)
126                 return -EINVAL;
127         if (n >= 2048)
128                 return -ENOMEM;
129
130         request = brc_make_request(op, br_name, NULL);
131         if (!request)
132                 return -ENOMEM;
133
134         reply = brc_send_command(net, request, attrs);
135         ret = PTR_ERR(reply);
136         if (IS_ERR(reply))
137                 goto exit;
138
139         ret = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
140         if (ret < 0)
141                 goto exit_free_skb;
142
143         ret = -EINVAL;
144         if (!attrs[BRC_GENL_A_IFINDEXES])
145                 goto exit_free_skb;
146
147         len = nla_len(attrs[BRC_GENL_A_IFINDEXES]);
148         indices = nla_data(attrs[BRC_GENL_A_IFINDEXES]);
149         if (len % sizeof(int))
150                 goto exit_free_skb;
151
152         n = min_t(int, n, len / sizeof(int));
153         ret = copy_to_user(uindices, indices, n * sizeof(int)) ? -EFAULT : n;
154
155 exit_free_skb:
156         kfree_skb(reply);
157 exit:
158         return ret;
159 }
160
161 /* Called with br_ioctl_mutex. */
162 static int brc_get_bridges(struct net *net, int __user *uindices, int n)
163 {
164         return brc_get_indices(net, BRC_GENL_C_GET_BRIDGES, NULL, uindices, n);
165 }
166
167 /* Legacy deviceless bridge ioctl's.  Called with br_ioctl_mutex. */
168 static int old_deviceless(struct net *net, void __user *uarg)
169 {
170         unsigned long args[3];
171
172         if (copy_from_user(args, uarg, sizeof(args)))
173                 return -EFAULT;
174
175         switch (args[0]) {
176         case BRCTL_GET_BRIDGES:
177                 return brc_get_bridges(net, (int __user *)args[1], args[2]);
178
179         case BRCTL_ADD_BRIDGE:
180                 return brc_add_del_bridge(net, (void __user *)args[1], 1);
181         case BRCTL_DEL_BRIDGE:
182                 return brc_add_del_bridge(net, (void __user *)args[1], 0);
183         }
184
185         return -EOPNOTSUPP;
186 }
187
188 /* Called with the br_ioctl_mutex. */
189 static int
190 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
191 brc_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)
192 {
193         struct net *net = NULL;
194 #else
195 brc_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
196 {
197 #endif
198         switch (cmd) {
199         case SIOCGIFBR:
200         case SIOCSIFBR:
201                 return old_deviceless(net, uarg);
202
203         case SIOCBRADDBR:
204                 return brc_add_del_bridge(net, uarg, 1);
205         case SIOCBRDELBR:
206                 return brc_add_del_bridge(net, uarg, 0);
207         }
208
209         return -EOPNOTSUPP;
210 }
211
212 static int brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
213 {
214         struct sk_buff *request;
215         struct net_device *port;
216         int err;
217
218         if (!capable(CAP_NET_ADMIN))
219                 return -EPERM;
220
221         port = __dev_get_by_index(dev_net(dev), port_ifindex);
222         if (!port)
223                 return -EINVAL;
224
225         /* Save name of dev and port because there's a race between the
226          * rtnl_unlock() and the brc_send_simple_command(). */
227         request = brc_make_request(add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL,
228                                    dev->name, port->name);
229         if (!request)
230                 return -ENOMEM;
231
232         rtnl_unlock();
233         err = brc_send_simple_command(dev_net(dev), request);
234         rtnl_lock();
235
236         return err;
237 }
238
239 static int brc_get_bridge_info(struct net_device *dev,
240                                struct __bridge_info __user *ub)
241 {
242         struct __bridge_info b;
243
244         memset(&b, 0, sizeof(struct __bridge_info));
245
246         /* First two bytes are the priority, which we should skip.  This comes
247          * from struct bridge_id in br_private.h, which is unavailable to us.
248          */
249         memcpy((u8 *)&b.bridge_id + 2, dev->dev_addr, ETH_ALEN);
250         b.stp_enabled = 0;
251
252         if (copy_to_user(ub, &b, sizeof(struct __bridge_info)))
253                 return -EFAULT;
254
255         return 0;
256 }
257
258 static int brc_get_port_list(struct net_device *dev, int __user *uindices,
259                              int num)
260 {
261         int retval;
262
263         rtnl_unlock();
264         retval = brc_get_indices(dev_net(dev), BRC_GENL_C_GET_PORTS, dev->name,
265                                  uindices, num);
266         rtnl_lock();
267
268         return retval;
269 }
270
271 /*
272  * Format up to a page worth of forwarding table entries
273  * userbuf -- where to copy result
274  * maxnum  -- maximum number of entries desired
275  *            (limited to a page for sanity)
276  * offset  -- number of records to skip
277  */
278 static int brc_get_fdb_entries(struct net_device *dev, void __user *userbuf,
279                                unsigned long maxnum, unsigned long offset)
280 {
281         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
282         struct sk_buff *request, *reply;
283         int retval;
284         int len;
285
286         /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */
287         if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry))
288                 maxnum = PAGE_SIZE/sizeof(struct __fdb_entry);
289
290         request = brc_make_request(BRC_GENL_C_FDB_QUERY, dev->name, NULL);
291         if (!request)
292                 return -ENOMEM;
293         if (nla_put_u64(request, BRC_GENL_A_FDB_COUNT, maxnum) ||
294             nla_put_u64(request, BRC_GENL_A_FDB_SKIP, offset))
295                 goto nla_put_failure;
296
297         rtnl_unlock();
298         reply = brc_send_command(dev_net(dev), request, attrs);
299         retval = PTR_ERR(reply);
300         if (IS_ERR(reply))
301                 goto exit;
302
303         retval = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
304         if (retval < 0)
305                 goto exit_free_skb;
306
307         retval = -EINVAL;
308         if (!attrs[BRC_GENL_A_FDB_DATA])
309                 goto exit_free_skb;
310         len = nla_len(attrs[BRC_GENL_A_FDB_DATA]);
311         if (len % sizeof(struct __fdb_entry) ||
312             len / sizeof(struct __fdb_entry) > maxnum)
313                 goto exit_free_skb;
314
315         retval = len / sizeof(struct __fdb_entry);
316         if (copy_to_user(userbuf, nla_data(attrs[BRC_GENL_A_FDB_DATA]), len))
317                 retval = -EFAULT;
318
319 exit_free_skb:
320         kfree_skb(reply);
321 exit:
322         rtnl_lock();
323         return retval;
324
325 nla_put_failure:
326         kfree_skb(request);
327         return -ENOMEM;
328 }
329
330 /* Legacy ioctl's through SIOCDEVPRIVATE.  Called with rtnl_lock. */
331 static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
332 {
333         unsigned long args[4];
334
335         if (copy_from_user(args, rq->ifr_data, sizeof(args)))
336                 return -EFAULT;
337
338         switch (args[0]) {
339         case BRCTL_ADD_IF:
340                 return brc_add_del_port(dev, args[1], 1);
341         case BRCTL_DEL_IF:
342                 return brc_add_del_port(dev, args[1], 0);
343
344         case BRCTL_GET_BRIDGE_INFO:
345                 return brc_get_bridge_info(dev, (struct __bridge_info __user *)args[1]);
346
347         case BRCTL_GET_PORT_LIST:
348                 return brc_get_port_list(dev, (int __user *)args[1], args[2]);
349
350         case BRCTL_GET_FDB_ENTRIES:
351                 return brc_get_fdb_entries(dev, (void __user *)args[1],
352                                            args[2], args[3]);
353         }
354
355         return -EOPNOTSUPP;
356 }
357
358 /* Called with the rtnl_lock. */
359 static int brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
360 {
361         int err;
362
363         switch (cmd) {
364         case SIOCDEVPRIVATE:
365                 err = old_dev_ioctl(dev, rq, cmd);
366                 break;
367
368         case SIOCBRADDIF:
369                 return brc_add_del_port(dev, rq->ifr_ifindex, 1);
370         case SIOCBRDELIF:
371                 return brc_add_del_port(dev, rq->ifr_ifindex, 0);
372
373         default:
374                 err = -EOPNOTSUPP;
375                 break;
376         }
377
378         return err;
379 }
380
381
382 static struct genl_family brc_genl_family = {
383         .id = GENL_ID_GENERATE,
384         .hdrsize = 0,
385         .name = BRC_GENL_FAMILY_NAME,
386         .version = 1,
387         .maxattr = BRC_GENL_A_MAX,
388          SET_NETNSOK
389 };
390
391 static int brc_genl_query(struct sk_buff *skb, struct genl_info *info)
392 {
393         int err = -EINVAL;
394         struct sk_buff *ans_skb;
395         void *data;
396
397         ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
398         if (!ans_skb)
399                 return -ENOMEM;
400
401         data = genlmsg_put_reply(ans_skb, info, &brc_genl_family,
402                                  0, BRC_GENL_C_QUERY_MC);
403         if (data == NULL) {
404                 err = -ENOMEM;
405                 goto err;
406         }
407         if (nla_put_u32(ans_skb, BRC_GENL_A_MC_GROUP, brc_mc_group.id))
408                 goto nla_put_failure;
409
410         genlmsg_end(ans_skb, data);
411         return genlmsg_reply(ans_skb, info);
412
413 err:
414 nla_put_failure:
415         kfree_skb(ans_skb);
416         return err;
417 }
418
419 /* Attribute policy: what each attribute may contain.  */
420 static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = {
421         [BRC_GENL_A_ERR_CODE] = { .type = NLA_U32 },
422         [BRC_GENL_A_FDB_DATA] = { .type = NLA_UNSPEC },
423 };
424
425 static int brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
426 {
427         unsigned long int flags;
428         int err;
429
430         if (!info->attrs[BRC_GENL_A_ERR_CODE])
431                 return -EINVAL;
432
433         skb = skb_clone(skb, GFP_KERNEL);
434         if (!skb)
435                 return -ENOMEM;
436
437         spin_lock_irqsave(&brc_lock, flags);
438         if (brc_seq == info->snd_seq) {
439                 brc_seq++;
440
441                 kfree_skb(brc_reply);
442                 brc_reply = skb;
443
444                 complete(&brc_done);
445                 err = 0;
446         } else {
447                 kfree_skb(skb);
448                 err = -ESTALE;
449         }
450         spin_unlock_irqrestore(&brc_lock, flags);
451
452         return err;
453 }
454
455 static struct genl_ops brc_genl_ops[] = {
456         { .cmd = BRC_GENL_C_QUERY_MC,
457           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
458           .policy = NULL,
459           .doit = brc_genl_query,
460         },
461         { .cmd = BRC_GENL_C_DP_RESULT,
462           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
463           .policy = brc_genl_policy,
464           .doit = brc_genl_dp_result,
465         },
466 };
467
468 static struct sk_buff *brc_send_command(struct net *net,
469                                         struct sk_buff *request,
470                                         struct nlattr **attrs)
471 {
472         unsigned long int flags;
473         struct sk_buff *reply;
474         int error;
475
476         mutex_lock(&brc_serial);
477
478         /* Increment sequence number first, so that we ignore any replies
479          * to stale requests. */
480         spin_lock_irqsave(&brc_lock, flags);
481         nlmsg_hdr(request)->nlmsg_seq = ++brc_seq;
482         INIT_COMPLETION(brc_done);
483         spin_unlock_irqrestore(&brc_lock, flags);
484
485         nlmsg_end(request, nlmsg_hdr(request));
486
487         /* Send message. */
488         error = genlmsg_multicast_netns(net, request, 0,
489                                         brc_mc_group.id, GFP_KERNEL);
490         if (error < 0)
491                 goto error;
492
493         /* Wait for reply. */
494         error = -ETIMEDOUT;
495         if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT)) {
496                 pr_warn("timed out waiting for userspace\n");
497                 goto error;
498         }
499
500         /* Grab reply. */
501         spin_lock_irqsave(&brc_lock, flags);
502         reply = brc_reply;
503         brc_reply = NULL;
504         spin_unlock_irqrestore(&brc_lock, flags);
505
506         mutex_unlock(&brc_serial);
507
508         /* Re-parse message.  Can't fail, since it parsed correctly once
509          * already. */
510         error = nlmsg_parse(nlmsg_hdr(reply), GENL_HDRLEN,
511                             attrs, BRC_GENL_A_MAX, brc_genl_policy);
512         WARN_ON(error);
513
514         return reply;
515
516 error:
517         mutex_unlock(&brc_serial);
518         return ERR_PTR(error);
519 }
520
521 static int __init brc_init(void)
522 {
523         int err;
524
525         pr_info("Open vSwitch Bridge Compatibility, built "__DATE__" "__TIME__"\n");
526
527         /* Set the bridge ioctl handler */
528         brioctl_set(brc_ioctl_deviceless_stub);
529
530         /* Set the openvswitch device ioctl handler */
531         ovs_dp_ioctl_hook = brc_dev_ioctl;
532
533         /* Randomize the initial sequence number.  This is not a security
534          * feature; it only helps avoid crossed wires between userspace and
535          * the kernel when the module is unloaded and reloaded. */
536         brc_seq = net_random();
537
538         /* Register generic netlink family to communicate changes to
539          * userspace. */
540         err = genl_register_family_with_ops(&brc_genl_family,
541                                             brc_genl_ops, ARRAY_SIZE(brc_genl_ops));
542         if (err)
543                 goto error;
544
545         strcpy(brc_mc_group.name, "brcompat");
546         err = genl_register_mc_group(&brc_genl_family, &brc_mc_group);
547         if (err < 0)
548                 goto err_unregister;
549
550         return 0;
551
552 err_unregister:
553         genl_unregister_family(&brc_genl_family);
554 error:
555         pr_emerg("failed to install!\n");
556         return err;
557 }
558
559 static void brc_cleanup(void)
560 {
561         /* Unregister ioctl hooks */
562         ovs_dp_ioctl_hook = NULL;
563         brioctl_set(NULL);
564
565         genl_unregister_family(&brc_genl_family);
566 }
567
568 module_init(brc_init);
569 module_exit(brc_cleanup);
570
571 MODULE_DESCRIPTION("Open vSwitch bridge compatibility");
572 MODULE_AUTHOR("Nicira, Inc.");
573 MODULE_LICENSE("GPL");
574
575 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
576 /*
577  * In kernels 2.6.36 and later, Open vSwitch can safely coexist with
578  * the Linux bridge module, but it does not make sense to load both bridge and
579  * brcompat, so this prevents it.
580  */
581 BRIDGE_MUTUAL_EXCLUSION;
582 #endif