datapath: Allow jumbograms through IPv6 parsing.
[sliver-openvswitch.git] / datapath / brcompat.c
1 /*
2  * Copyright (c) 2009, 2011 Nicira Networks.
3  * Distributed under the terms of the GNU GPL version 2.
4  *
5  * Significant portions of this file may be copied from parts of the Linux
6  * kernel, by Linus Torvalds and others.
7  */
8
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11 #include <linux/kernel.h>
12 #include <asm/uaccess.h>
13 #include <linux/completion.h>
14 #include <linux/etherdevice.h>
15 #include <linux/if_bridge.h>
16 #include <linux/netdevice.h>
17 #include <linux/rtnetlink.h>
18 #include <net/genetlink.h>
19
20 #include "openvswitch/brcompat-netlink.h"
21 #include "datapath.h"
22
23 static struct genl_family brc_genl_family;
24 static struct genl_multicast_group brc_mc_group;
25
26 /* Time to wait for ovs-vswitchd to respond to a datapath action, in
27  * jiffies. */
28 #define BRC_TIMEOUT (HZ * 5)
29
30 /* Mutex to serialize ovs-brcompatd callbacks.  (Some callbacks naturally hold
31  * br_ioctl_mutex, others hold rtnl_lock, but we can't take the former
32  * ourselves and we don't want to hold the latter over a potentially long
33  * period of time.) */
34 static DEFINE_MUTEX(brc_serial);
35
36 /* Userspace communication. */
37 static DEFINE_SPINLOCK(brc_lock);    /* Ensure atomic access to these vars. */
38 static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */
39 static struct sk_buff *brc_reply;    /* Reply from userspace. */
40 static u32 brc_seq;                  /* Sequence number for current op. */
41
42 static struct sk_buff *brc_send_command(struct sk_buff *, struct nlattr **attrs);
43 static int brc_send_simple_command(struct sk_buff *);
44
45 static struct sk_buff *brc_make_request(int op, const char *bridge,
46                                         const char *port)
47 {
48         struct sk_buff *skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
49         if (!skb)
50                 goto error;
51
52         genlmsg_put(skb, 0, 0, &brc_genl_family, 0, op);
53         if (bridge)
54                 NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
55         if (port)
56                 NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port);
57         return skb;
58
59 nla_put_failure:
60         kfree_skb(skb);
61 error:
62         return NULL;
63 }
64
65 static int brc_send_simple_command(struct sk_buff *request)
66 {
67         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
68         struct sk_buff *reply;
69         int error;
70
71         reply = brc_send_command(request, attrs);
72         if (IS_ERR(reply))
73                 return PTR_ERR(reply);
74
75         error = nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
76         kfree_skb(reply);
77         return -error;
78 }
79
80 static int brc_add_del_bridge(char __user *uname, int add)
81 {
82         struct sk_buff *request;
83         char name[IFNAMSIZ];
84
85         if (!capable(CAP_NET_ADMIN))
86                 return -EPERM;
87
88         if (copy_from_user(name, uname, IFNAMSIZ))
89                 return -EFAULT;
90
91         name[IFNAMSIZ - 1] = 0;
92         request = brc_make_request(add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL,
93                                    name, NULL);
94         if (!request)
95                 return -ENOMEM;
96
97         return brc_send_simple_command(request);
98 }
99
100 static int brc_get_indices(int op, const char *br_name,
101                            int __user *uindices, int n)
102 {
103         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
104         struct sk_buff *request, *reply;
105         int *indices;
106         int ret;
107         int len;
108
109         if (n < 0)
110                 return -EINVAL;
111         if (n >= 2048)
112                 return -ENOMEM;
113
114         request = brc_make_request(op, br_name, NULL);
115         if (!request)
116                 return -ENOMEM;
117
118         reply = brc_send_command(request, attrs);
119         ret = PTR_ERR(reply);
120         if (IS_ERR(reply))
121                 goto exit;
122
123         ret = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
124         if (ret < 0)
125                 goto exit_free_skb;
126
127         ret = -EINVAL;
128         if (!attrs[BRC_GENL_A_IFINDEXES])
129                 goto exit_free_skb;
130
131         len = nla_len(attrs[BRC_GENL_A_IFINDEXES]);
132         indices = nla_data(attrs[BRC_GENL_A_IFINDEXES]);
133         if (len % sizeof(int))
134                 goto exit_free_skb;
135
136         n = min_t(int, n, len / sizeof(int));
137         ret = copy_to_user(uindices, indices, n * sizeof(int)) ? -EFAULT : n;
138
139 exit_free_skb:
140         kfree_skb(reply);
141 exit:
142         return ret;
143 }
144
145 /* Called with br_ioctl_mutex. */
146 static int brc_get_bridges(int __user *uindices, int n)
147 {
148         return brc_get_indices(BRC_GENL_C_GET_BRIDGES, NULL, uindices, n);
149 }
150
151 /* Legacy deviceless bridge ioctl's.  Called with br_ioctl_mutex. */
152 static int old_deviceless(void __user *uarg)
153 {
154         unsigned long args[3];
155
156         if (copy_from_user(args, uarg, sizeof(args)))
157                 return -EFAULT;
158
159         switch (args[0]) {
160         case BRCTL_GET_BRIDGES:
161                 return brc_get_bridges((int __user *)args[1], args[2]);
162
163         case BRCTL_ADD_BRIDGE:
164                 return brc_add_del_bridge((void __user *)args[1], 1);
165         case BRCTL_DEL_BRIDGE:
166                 return brc_add_del_bridge((void __user *)args[1], 0);
167         }
168
169         return -EOPNOTSUPP;
170 }
171
172 /* Called with the br_ioctl_mutex. */
173 static int
174 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
175 brc_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)
176 #else
177 brc_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
178 #endif
179 {
180         switch (cmd) {
181         case SIOCGIFBR:
182         case SIOCSIFBR:
183                 return old_deviceless(uarg);
184
185         case SIOCBRADDBR:
186                 return brc_add_del_bridge(uarg, 1);
187         case SIOCBRDELBR:
188                 return brc_add_del_bridge(uarg, 0);
189         }
190
191         return -EOPNOTSUPP;
192 }
193
194 static int brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
195 {
196         struct sk_buff *request;
197         struct net_device *port;
198         int err;
199
200         if (!capable(CAP_NET_ADMIN))
201                 return -EPERM;
202
203         port = __dev_get_by_index(&init_net, port_ifindex);
204         if (!port)
205                 return -EINVAL;
206
207         /* Save name of dev and port because there's a race between the
208          * rtnl_unlock() and the brc_send_simple_command(). */
209         request = brc_make_request(add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL,
210                                    dev->name, port->name);
211         if (!request)
212                 return -ENOMEM;
213
214         rtnl_unlock();
215         err = brc_send_simple_command(request);
216         rtnl_lock();
217
218         return err;
219 }
220
221 static int brc_get_bridge_info(struct net_device *dev,
222                                struct __bridge_info __user *ub)
223 {
224         struct __bridge_info b;
225
226         memset(&b, 0, sizeof(struct __bridge_info));
227
228         /* First two bytes are the priority, which we should skip.  This comes
229          * from struct bridge_id in br_private.h, which is unavailable to us.
230          */
231         memcpy((u8 *)&b.bridge_id + 2, dev->dev_addr, ETH_ALEN);
232         b.stp_enabled = 0;
233
234         if (copy_to_user(ub, &b, sizeof(struct __bridge_info)))
235                 return -EFAULT;
236
237         return 0;
238 }
239
240 static int brc_get_port_list(struct net_device *dev, int __user *uindices,
241                              int num)
242 {
243         int retval;
244
245         rtnl_unlock();
246         retval = brc_get_indices(BRC_GENL_C_GET_PORTS, dev->name,
247                                  uindices, num);
248         rtnl_lock();
249
250         return retval;
251 }
252
253 /*
254  * Format up to a page worth of forwarding table entries
255  * userbuf -- where to copy result
256  * maxnum  -- maximum number of entries desired
257  *            (limited to a page for sanity)
258  * offset  -- number of records to skip
259  */
260 static int brc_get_fdb_entries(struct net_device *dev, void __user *userbuf,
261                                unsigned long maxnum, unsigned long offset)
262 {
263         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
264         struct sk_buff *request, *reply;
265         int retval;
266         int len;
267
268         /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */
269         if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry))
270                 maxnum = PAGE_SIZE/sizeof(struct __fdb_entry);
271
272         request = brc_make_request(BRC_GENL_C_FDB_QUERY, dev->name, NULL);
273         if (!request)
274                 return -ENOMEM;
275         NLA_PUT_U64(request, BRC_GENL_A_FDB_COUNT, maxnum);
276         NLA_PUT_U64(request, BRC_GENL_A_FDB_SKIP, offset);
277
278         rtnl_unlock();
279         reply = brc_send_command(request, attrs);
280         retval = PTR_ERR(reply);
281         if (IS_ERR(reply))
282                 goto exit;
283
284         retval = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
285         if (retval < 0)
286                 goto exit_free_skb;
287
288         retval = -EINVAL;
289         if (!attrs[BRC_GENL_A_FDB_DATA])
290                 goto exit_free_skb;
291         len = nla_len(attrs[BRC_GENL_A_FDB_DATA]);
292         if (len % sizeof(struct __fdb_entry) ||
293             len / sizeof(struct __fdb_entry) > maxnum)
294                 goto exit_free_skb;
295
296         retval = len / sizeof(struct __fdb_entry);
297         if (copy_to_user(userbuf, nla_data(attrs[BRC_GENL_A_FDB_DATA]), len))
298                 retval = -EFAULT;
299
300 exit_free_skb:
301         kfree_skb(reply);
302 exit:
303         rtnl_lock();
304         return retval;
305
306 nla_put_failure:
307         kfree_skb(request);
308         return -ENOMEM;
309 }
310
311 /* Legacy ioctl's through SIOCDEVPRIVATE.  Called with rtnl_lock. */
312 static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
313 {
314         unsigned long args[4];
315
316         if (copy_from_user(args, rq->ifr_data, sizeof(args)))
317                 return -EFAULT;
318
319         switch (args[0]) {
320         case BRCTL_ADD_IF:
321                 return brc_add_del_port(dev, args[1], 1);
322         case BRCTL_DEL_IF:
323                 return brc_add_del_port(dev, args[1], 0);
324
325         case BRCTL_GET_BRIDGE_INFO:
326                 return brc_get_bridge_info(dev, (struct __bridge_info __user *)args[1]);
327
328         case BRCTL_GET_PORT_LIST:
329                 return brc_get_port_list(dev, (int __user *)args[1], args[2]);
330
331         case BRCTL_GET_FDB_ENTRIES:
332                 return brc_get_fdb_entries(dev, (void __user *)args[1],
333                                            args[2], args[3]);
334         }
335
336         return -EOPNOTSUPP;
337 }
338
339 /* Called with the rtnl_lock. */
340 static int brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
341 {
342         int err;
343
344         switch (cmd) {
345                 case SIOCDEVPRIVATE:
346                         err = old_dev_ioctl(dev, rq, cmd);
347                         break;
348
349                 case SIOCBRADDIF:
350                         return brc_add_del_port(dev, rq->ifr_ifindex, 1);
351                 case SIOCBRDELIF:
352                         return brc_add_del_port(dev, rq->ifr_ifindex, 0);
353
354                 default:
355                         err = -EOPNOTSUPP;
356                         break;
357         }
358
359         return err;
360 }
361
362
363 static struct genl_family brc_genl_family = {
364         .id = GENL_ID_GENERATE,
365         .hdrsize = 0,
366         .name = BRC_GENL_FAMILY_NAME,
367         .version = 1,
368         .maxattr = BRC_GENL_A_MAX,
369 };
370
371 static int brc_genl_query(struct sk_buff *skb, struct genl_info *info)
372 {
373         int err = -EINVAL;
374         struct sk_buff *ans_skb;
375         void *data;
376
377         ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
378         if (!ans_skb)
379                 return -ENOMEM;
380
381         data = genlmsg_put_reply(ans_skb, info, &brc_genl_family,
382                                  0, BRC_GENL_C_QUERY_MC);
383         if (data == NULL) {
384                 err = -ENOMEM;
385                 goto err;
386         }
387         NLA_PUT_U32(ans_skb, BRC_GENL_A_MC_GROUP, brc_mc_group.id);
388
389         genlmsg_end(ans_skb, data);
390         return genlmsg_reply(ans_skb, info);
391
392 err:
393 nla_put_failure:
394         kfree_skb(ans_skb);
395         return err;
396 }
397
398 /* Attribute policy: what each attribute may contain.  */
399 static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = {
400         [BRC_GENL_A_ERR_CODE] = { .type = NLA_U32 },
401         [BRC_GENL_A_FDB_DATA] = { .type = NLA_UNSPEC },
402 };
403
404 static int brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
405 {
406         unsigned long int flags;
407         int err;
408
409         if (!info->attrs[BRC_GENL_A_ERR_CODE])
410                 return -EINVAL;
411
412         skb = skb_clone(skb, GFP_KERNEL);
413         if (!skb)
414                 return -ENOMEM;
415
416         spin_lock_irqsave(&brc_lock, flags);
417         if (brc_seq == info->snd_seq) {
418                 brc_seq++;
419
420                 kfree_skb(brc_reply);
421                 brc_reply = skb;
422
423                 complete(&brc_done);
424                 err = 0;
425         } else {
426                 kfree_skb(skb);
427                 err = -ESTALE;
428         }
429         spin_unlock_irqrestore(&brc_lock, flags);
430
431         return err;
432 }
433
434 static struct genl_ops brc_genl_ops[] = {
435         { .cmd = BRC_GENL_C_QUERY_MC,
436           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
437           .policy = NULL,
438           .doit = brc_genl_query,
439         },
440         { .cmd = BRC_GENL_C_DP_RESULT,
441           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
442           .policy = brc_genl_policy,
443           .doit = brc_genl_dp_result,
444         },
445 };
446
447 static struct sk_buff *brc_send_command(struct sk_buff *request,
448                                         struct nlattr **attrs)
449 {
450         unsigned long int flags;
451         struct sk_buff *reply;
452         int error;
453
454         mutex_lock(&brc_serial);
455
456         /* Increment sequence number first, so that we ignore any replies
457          * to stale requests. */
458         spin_lock_irqsave(&brc_lock, flags);
459         nlmsg_hdr(request)->nlmsg_seq = ++brc_seq;
460         INIT_COMPLETION(brc_done);
461         spin_unlock_irqrestore(&brc_lock, flags);
462
463         nlmsg_end(request, nlmsg_hdr(request));
464
465         /* Send message. */
466         error = genlmsg_multicast(request, 0, brc_mc_group.id, GFP_KERNEL);
467         if (error < 0)
468                 goto error;
469
470         /* Wait for reply. */
471         error = -ETIMEDOUT;
472         if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT)) {
473                 pr_warn("timed out waiting for userspace\n");
474                 goto error;
475     }
476
477         /* Grab reply. */
478         spin_lock_irqsave(&brc_lock, flags);
479         reply = brc_reply;
480         brc_reply = NULL;
481         spin_unlock_irqrestore(&brc_lock, flags);
482
483         mutex_unlock(&brc_serial);
484
485         /* Re-parse message.  Can't fail, since it parsed correctly once
486          * already. */
487         error = nlmsg_parse(nlmsg_hdr(reply), GENL_HDRLEN,
488                             attrs, BRC_GENL_A_MAX, brc_genl_policy);
489         WARN_ON(error);
490
491         return reply;
492
493 error:
494         mutex_unlock(&brc_serial);
495         return ERR_PTR(error);
496 }
497
498 static int __init brc_init(void)
499 {
500         int err;
501
502         printk("Open vSwitch Bridge Compatibility, built "__DATE__" "__TIME__"\n");
503
504         /* Set the bridge ioctl handler */
505         brioctl_set(brc_ioctl_deviceless_stub);
506
507         /* Set the openvswitch_mod device ioctl handler */
508         dp_ioctl_hook = brc_dev_ioctl;
509
510         /* Randomize the initial sequence number.  This is not a security
511          * feature; it only helps avoid crossed wires between userspace and
512          * the kernel when the module is unloaded and reloaded. */
513         brc_seq = net_random();
514
515         /* Register generic netlink family to communicate changes to
516          * userspace. */
517         err = genl_register_family_with_ops(&brc_genl_family,
518                                             brc_genl_ops, ARRAY_SIZE(brc_genl_ops));
519         if (err)
520                 goto error;
521
522         strcpy(brc_mc_group.name, "brcompat");
523         err = genl_register_mc_group(&brc_genl_family, &brc_mc_group);
524         if (err < 0)
525                 goto err_unregister;
526
527         return 0;
528
529 err_unregister:
530         genl_unregister_family(&brc_genl_family);
531 error:
532         pr_emerg("failed to install!\n");
533         return err;
534 }
535
536 static void brc_cleanup(void)
537 {
538         /* Unregister ioctl hooks */
539         dp_ioctl_hook = NULL;
540         brioctl_set(NULL);
541
542         genl_unregister_family(&brc_genl_family);
543 }
544
545 module_init(brc_init);
546 module_exit(brc_cleanup);
547
548 MODULE_DESCRIPTION("Open vSwitch bridge compatibility");
549 MODULE_AUTHOR("Nicira Networks");
550 MODULE_LICENSE("GPL");