brcompat_mod: Check if user has CAP_NET_ADMIN in ioctl handler
[sliver-openvswitch.git] / datapath / brcompat.c
1 /*
2  * Copyright (c) 2009 Nicira Networks.
3  * Distributed under the terms of the GNU GPL version 2.
4  *
5  * Significant portions of this file may be copied from parts of the Linux
6  * kernel, by Linus Torvalds and others.
7  */
8
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11 #include <linux/kernel.h>
12 #include <asm/uaccess.h>
13 #include <linux/completion.h>
14 #include <linux/etherdevice.h>
15 #include <linux/if_bridge.h>
16 #include <linux/netdevice.h>
17 #include <linux/rtnetlink.h>
18 #include <net/genetlink.h>
19
20 #include "compat.h"
21 #include "openvswitch/brcompat-netlink.h"
22 #include "brc_procfs.h"
23 #include "datapath.h"
24
25 static struct genl_family brc_genl_family;
26 static struct genl_multicast_group brc_mc_group;
27
28 /* Time to wait for ovs-vswitchd to respond to a datapath action, in
29  * jiffies. */
30 #define BRC_TIMEOUT (HZ * 5)
31
32 /* Mutex to serialize ovs-brcompatd callbacks.  (Some callbacks naturally hold
33  * br_ioctl_mutex, others hold rtnl_lock, but we can't take the former
34  * ourselves and we don't want to hold the latter over a potentially long
35  * period of time.) */
36 static DEFINE_MUTEX(brc_serial);
37
38 /* Userspace communication. */
39 static DEFINE_SPINLOCK(brc_lock);    /* Ensure atomic access to these vars. */
40 static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */
41 static struct sk_buff *brc_reply;    /* Reply from userspace. */
42 static u32 brc_seq;                  /* Sequence number for current op. */
43
44 static struct sk_buff *brc_send_command(struct sk_buff *, struct nlattr **attrs);
45 static int brc_send_simple_command(struct sk_buff *);
46
47 static struct sk_buff *brc_make_request(int op, const char *bridge,
48                                         const char *port)
49 {
50         struct sk_buff *skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
51         if (!skb)
52                 goto error;
53
54         genlmsg_put(skb, 0, 0, &brc_genl_family, 0, op);
55         if (bridge)
56                 NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
57         if (port)
58                 NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port);
59         return skb;
60
61 nla_put_failure:
62         kfree_skb(skb);
63 error:
64         return NULL;
65 }
66
67 static int brc_send_simple_command(struct sk_buff *request)
68 {
69         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
70         struct sk_buff *reply;
71         int error;
72
73         reply = brc_send_command(request, attrs);
74         if (IS_ERR(reply))
75                 return PTR_ERR(reply);
76
77         error = nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
78         kfree_skb(reply);
79         return -error;
80 }
81
82 static int brc_add_del_bridge(char __user *uname, int add)
83 {
84         struct sk_buff *request;
85         char name[IFNAMSIZ];
86
87         if (!capable(CAP_NET_ADMIN))
88                 return -EPERM;
89
90         if (copy_from_user(name, uname, IFNAMSIZ))
91                 return -EFAULT;
92
93         name[IFNAMSIZ - 1] = 0;
94         request = brc_make_request(add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL,
95                                    name, NULL);
96         if (!request)
97                 return -ENOMEM;
98
99         return brc_send_simple_command(request);
100 }
101
102 static int brc_get_indices(int op, const char *br_name,
103                            int __user *uindices, int n)
104 {
105         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
106         struct sk_buff *request, *reply;
107         int *indices;
108         int ret;
109         int len;
110
111         if (n < 0)
112                 return -EINVAL;
113         if (n >= 2048)
114                 return -ENOMEM;
115
116         request = brc_make_request(op, br_name, NULL);
117         if (!request)
118                 return -ENOMEM;
119
120         reply = brc_send_command(request, attrs);
121         ret = PTR_ERR(reply);
122         if (IS_ERR(reply))
123                 goto exit;
124
125         ret = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
126         if (ret < 0)
127                 goto exit_free_skb;
128
129         ret = -EINVAL;
130         if (!attrs[BRC_GENL_A_IFINDEXES])
131                 goto exit_free_skb;
132
133         len = nla_len(attrs[BRC_GENL_A_IFINDEXES]);
134         indices = nla_data(attrs[BRC_GENL_A_IFINDEXES]);
135         if (len % sizeof(int))
136                 goto exit_free_skb;
137
138         n = min_t(int, n, len / sizeof(int));
139         ret = copy_to_user(uindices, indices, n * sizeof(int)) ? -EFAULT : n;
140
141 exit_free_skb:
142         kfree_skb(reply);
143 exit:
144         return ret;
145 }
146
147 /* Called with br_ioctl_mutex. */
148 static int brc_get_bridges(int __user *uindices, int n)
149 {
150         return brc_get_indices(BRC_GENL_C_GET_BRIDGES, NULL, uindices, n);
151 }
152
153 /* Legacy deviceless bridge ioctl's.  Called with br_ioctl_mutex. */
154 static int old_deviceless(void __user *uarg)
155 {
156         unsigned long args[3];
157
158         if (copy_from_user(args, uarg, sizeof(args)))
159                 return -EFAULT;
160
161         switch (args[0]) {
162         case BRCTL_GET_BRIDGES:
163                 return brc_get_bridges((int __user *)args[1], args[2]);
164
165         case BRCTL_ADD_BRIDGE:
166                 return brc_add_del_bridge((void __user *)args[1], 1);
167         case BRCTL_DEL_BRIDGE:
168                 return brc_add_del_bridge((void __user *)args[1], 0);
169         }
170
171         return -EOPNOTSUPP;
172 }
173
174 /* Called with the br_ioctl_mutex. */
175 static int
176 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
177 brc_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)
178 #else
179 brc_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
180 #endif
181 {
182         switch (cmd) {
183         case SIOCGIFBR:
184         case SIOCSIFBR:
185                 return old_deviceless(uarg);
186
187         case SIOCBRADDBR:
188                 return brc_add_del_bridge(uarg, 1);
189         case SIOCBRDELBR:
190                 return brc_add_del_bridge(uarg, 0);
191         }
192
193         return -EOPNOTSUPP;
194 }
195
196 static int brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
197 {
198         struct sk_buff *request;
199         struct net_device *port;
200         int err;
201
202         if (!capable(CAP_NET_ADMIN))
203                 return -EPERM;
204
205         port = __dev_get_by_index(&init_net, port_ifindex);
206         if (!port)
207                 return -EINVAL;
208
209         /* Save name of dev and port because there's a race between the
210          * rtnl_unlock() and the brc_send_simple_command(). */
211         request = brc_make_request(add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL,
212                                    dev->name, port->name);
213         if (!request)
214                 return -ENOMEM;
215
216         rtnl_unlock();
217         err = brc_send_simple_command(request);
218         rtnl_lock();
219
220         return err;
221 }
222
223 static int brc_get_bridge_info(struct net_device *dev,
224                                struct __bridge_info __user *ub)
225 {
226         struct __bridge_info b;
227         u64 id = 0;
228         int i;
229
230         memset(&b, 0, sizeof(struct __bridge_info));
231
232         for (i=0; i<ETH_ALEN; i++)
233                 id |= (u64)dev->dev_addr[i] << (8*(ETH_ALEN-1 - i));
234         b.bridge_id = cpu_to_be64(id);
235         b.stp_enabled = 0;
236
237         if (copy_to_user(ub, &b, sizeof(struct __bridge_info)))
238                 return -EFAULT;
239
240         return 0;
241 }
242
243 static int brc_get_port_list(struct net_device *dev, int __user *uindices,
244                              int num)
245 {
246         int retval;
247
248         rtnl_unlock();
249         retval = brc_get_indices(BRC_GENL_C_GET_PORTS, dev->name,
250                                  uindices, num);
251         rtnl_lock();
252
253         return retval;
254 }
255
256 /*
257  * Format up to a page worth of forwarding table entries
258  * userbuf -- where to copy result
259  * maxnum  -- maximum number of entries desired
260  *            (limited to a page for sanity)
261  * offset  -- number of records to skip
262  */
263 static int brc_get_fdb_entries(struct net_device *dev, void __user *userbuf,
264                                unsigned long maxnum, unsigned long offset)
265 {
266         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
267         struct sk_buff *request, *reply;
268         int retval;
269         int len;
270
271         /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */
272         if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry))
273                 maxnum = PAGE_SIZE/sizeof(struct __fdb_entry);
274
275         request = brc_make_request(BRC_GENL_C_FDB_QUERY, dev->name, NULL);
276         if (!request)
277                 return -ENOMEM;
278         NLA_PUT_U64(request, BRC_GENL_A_FDB_COUNT, maxnum);
279         NLA_PUT_U64(request, BRC_GENL_A_FDB_SKIP, offset);
280
281         rtnl_unlock();
282         reply = brc_send_command(request, attrs);
283         retval = PTR_ERR(reply);
284         if (IS_ERR(reply))
285                 goto exit;
286
287         retval = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
288         if (retval < 0)
289                 goto exit_free_skb;
290
291         retval = -EINVAL;
292         if (!attrs[BRC_GENL_A_FDB_DATA])
293                 goto exit_free_skb;
294         len = nla_len(attrs[BRC_GENL_A_FDB_DATA]);
295         if (len % sizeof(struct __fdb_entry) ||
296             len / sizeof(struct __fdb_entry) > maxnum)
297                 goto exit_free_skb;
298
299         retval = len / sizeof(struct __fdb_entry);
300         if (copy_to_user(userbuf, nla_data(attrs[BRC_GENL_A_FDB_DATA]), len))
301                 retval = -EFAULT;
302
303 exit_free_skb:
304         kfree_skb(reply);
305 exit:
306         rtnl_lock();
307         return retval;
308
309 nla_put_failure:
310         kfree_skb(request);
311         return -ENOMEM;
312 }
313
314 /* Legacy ioctl's through SIOCDEVPRIVATE.  Called with rtnl_lock. */
315 static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
316 {
317         unsigned long args[4];
318
319         if (copy_from_user(args, rq->ifr_data, sizeof(args)))
320                 return -EFAULT;
321
322         switch (args[0]) {
323         case BRCTL_ADD_IF:
324                 return brc_add_del_port(dev, args[1], 1);
325         case BRCTL_DEL_IF:
326                 return brc_add_del_port(dev, args[1], 0);
327
328         case BRCTL_GET_BRIDGE_INFO:
329                 return brc_get_bridge_info(dev, (struct __bridge_info __user *)args[1]);
330
331         case BRCTL_GET_PORT_LIST:
332                 return brc_get_port_list(dev, (int __user *)args[1], args[2]);
333
334         case BRCTL_GET_FDB_ENTRIES:
335                 return brc_get_fdb_entries(dev, (void __user *)args[1],
336                                            args[2], args[3]);
337         }
338
339         return -EOPNOTSUPP;
340 }
341
342 /* Called with the rtnl_lock. */
343 static int brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
344 {
345         int err;
346
347         switch (cmd) {
348                 case SIOCDEVPRIVATE:
349                         err = old_dev_ioctl(dev, rq, cmd);
350                         break;
351
352                 case SIOCBRADDIF:
353                         return brc_add_del_port(dev, rq->ifr_ifindex, 1);
354                 case SIOCBRDELIF:
355                         return brc_add_del_port(dev, rq->ifr_ifindex, 0);
356
357                 default:
358                         err = -EOPNOTSUPP;
359                         break;
360         }
361
362         return err;
363 }
364
365
366 static struct genl_family brc_genl_family = {
367         .id = GENL_ID_GENERATE,
368         .hdrsize = 0,
369         .name = BRC_GENL_FAMILY_NAME,
370         .version = 1,
371         .maxattr = BRC_GENL_A_MAX,
372 };
373
374 static int brc_genl_query(struct sk_buff *skb, struct genl_info *info)
375 {
376         int err = -EINVAL;
377         struct sk_buff *ans_skb;
378         void *data;
379
380         ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
381         if (!ans_skb)
382                 return -ENOMEM;
383
384         data = genlmsg_put_reply(ans_skb, info, &brc_genl_family,
385                                  0, BRC_GENL_C_QUERY_MC);
386         if (data == NULL) {
387                 err = -ENOMEM;
388                 goto err;
389         }
390         NLA_PUT_U32(ans_skb, BRC_GENL_A_MC_GROUP, brc_mc_group.id);
391
392         genlmsg_end(ans_skb, data);
393         return genlmsg_reply(ans_skb, info);
394
395 err:
396 nla_put_failure:
397         kfree_skb(ans_skb);
398         return err;
399 }
400
401 static struct genl_ops brc_genl_ops_query_dp = {
402         .cmd = BRC_GENL_C_QUERY_MC,
403         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
404         .policy = NULL,
405         .doit = brc_genl_query,
406         .dumpit = NULL
407 };
408
409 /* Attribute policy: what each attribute may contain.  */
410 static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = {
411         [BRC_GENL_A_ERR_CODE] = { .type = NLA_U32 },
412
413         [BRC_GENL_A_PROC_DIR] = { .type = NLA_NUL_STRING },
414         [BRC_GENL_A_PROC_NAME] = { .type = NLA_NUL_STRING },
415         [BRC_GENL_A_PROC_DATA] = { .type = NLA_NUL_STRING },
416
417         [BRC_GENL_A_FDB_DATA] = { .type = NLA_UNSPEC },
418 };
419
420 static int brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
421 {
422         unsigned long int flags;
423         int err;
424
425         if (!info->attrs[BRC_GENL_A_ERR_CODE])
426                 return -EINVAL;
427
428         skb = skb_clone(skb, GFP_KERNEL);
429         if (!skb)
430                 return -ENOMEM;
431
432         spin_lock_irqsave(&brc_lock, flags);
433         if (brc_seq == info->snd_seq) {
434                 brc_seq++;
435
436                 kfree_skb(brc_reply);
437                 brc_reply = skb;
438
439                 complete(&brc_done);
440                 err = 0;
441         } else {
442                 kfree_skb(skb);
443                 err = -ESTALE;
444         }
445         spin_unlock_irqrestore(&brc_lock, flags);
446
447         return err;
448 }
449
450 static struct genl_ops brc_genl_ops_dp_result = {
451         .cmd = BRC_GENL_C_DP_RESULT,
452         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
453         .policy = brc_genl_policy,
454         .doit = brc_genl_dp_result,
455         .dumpit = NULL
456 };
457
458 static struct genl_ops brc_genl_ops_set_proc = {
459         .cmd = BRC_GENL_C_SET_PROC,
460         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
461         .policy = brc_genl_policy,
462         .doit = brc_genl_set_proc,
463         .dumpit = NULL
464 };
465
466 static struct sk_buff *brc_send_command(struct sk_buff *request,
467                                         struct nlattr **attrs)
468 {
469         unsigned long int flags;
470         struct sk_buff *reply;
471         int error;
472
473         mutex_lock(&brc_serial);
474
475         /* Increment sequence number first, so that we ignore any replies
476          * to stale requests. */
477         spin_lock_irqsave(&brc_lock, flags);
478         nlmsg_hdr(request)->nlmsg_seq = ++brc_seq;
479         INIT_COMPLETION(brc_done);
480         spin_unlock_irqrestore(&brc_lock, flags);
481
482         nlmsg_end(request, nlmsg_hdr(request));
483
484         /* Send message. */
485         error = genlmsg_multicast(request, 0, brc_mc_group.id, GFP_KERNEL);
486         if (error < 0)
487                 goto error;
488
489         /* Wait for reply. */
490         error = -ETIMEDOUT;
491         if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT)) {
492                 pr_warn("timed out waiting for userspace\n");
493                 goto error;
494     }
495
496         /* Grab reply. */
497         spin_lock_irqsave(&brc_lock, flags);
498         reply = brc_reply;
499         brc_reply = NULL;
500         spin_unlock_irqrestore(&brc_lock, flags);
501
502         mutex_unlock(&brc_serial);
503
504         /* Re-parse message.  Can't fail, since it parsed correctly once
505          * already. */
506         error = nlmsg_parse(nlmsg_hdr(reply), GENL_HDRLEN,
507                             attrs, BRC_GENL_A_MAX, brc_genl_policy);
508         WARN_ON(error);
509
510         return reply;
511
512 error:
513         mutex_unlock(&brc_serial);
514         return ERR_PTR(error);
515 }
516
517 static int __init brc_init(void)
518 {
519         int err;
520
521         printk("Open vSwitch Bridge Compatibility, built "__DATE__" "__TIME__"\n");
522
523         /* Set the bridge ioctl handler */
524         brioctl_set(brc_ioctl_deviceless_stub);
525
526         /* Set the openvswitch_mod device ioctl handler */
527         dp_ioctl_hook = brc_dev_ioctl;
528
529         /* Randomize the initial sequence number.  This is not a security
530          * feature; it only helps avoid crossed wires between userspace and
531          * the kernel when the module is unloaded and reloaded. */
532         brc_seq = net_random();
533
534         /* Register generic netlink family to communicate changes to
535          * userspace. */
536         err = genl_register_family(&brc_genl_family);
537         if (err)
538                 goto error;
539
540         err = genl_register_ops(&brc_genl_family, &brc_genl_ops_query_dp);
541         if (err != 0)
542                 goto err_unregister;
543
544         err = genl_register_ops(&brc_genl_family, &brc_genl_ops_dp_result);
545         if (err != 0)
546                 goto err_unregister;
547
548         err = genl_register_ops(&brc_genl_family, &brc_genl_ops_set_proc);
549         if (err != 0)
550                 goto err_unregister;
551
552         strcpy(brc_mc_group.name, "brcompat");
553         err = genl_register_mc_group(&brc_genl_family, &brc_mc_group);
554         if (err < 0)
555                 goto err_unregister;
556
557         return 0;
558
559 err_unregister:
560         genl_unregister_family(&brc_genl_family);
561 error:
562         pr_emerg("failed to install!\n");
563         return err;
564 }
565
566 static void brc_cleanup(void)
567 {
568         /* Unregister ioctl hooks */
569         dp_ioctl_hook = NULL;
570         brioctl_set(NULL);
571
572         genl_unregister_family(&brc_genl_family);
573         brc_procfs_exit();
574 }
575
576 module_init(brc_init);
577 module_exit(brc_cleanup);
578
579 MODULE_DESCRIPTION("Open vSwitch bridge compatibility");
580 MODULE_AUTHOR("Nicira Networks");
581 MODULE_LICENSE("GPL");