meta-flow: Correctly set destination MAC in mf_set_flow_value().
[sliver-openvswitch.git] / datapath / brcompat.c
1 /*
2  * Copyright (c) 2007-2012 Nicira Networks.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/module.h>
22 #include <linux/kernel.h>
23 #include <linux/uaccess.h>
24 #include <linux/completion.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_bridge.h>
27 #include <linux/netdevice.h>
28 #include <linux/rtnetlink.h>
29 #include <net/genetlink.h>
30
31 #include "openvswitch/brcompat-netlink.h"
32 #include "datapath.h"
33
34 static struct genl_family brc_genl_family;
35 static struct genl_multicast_group brc_mc_group;
36
37 /* Time to wait for ovs-vswitchd to respond to a datapath action, in
38  * jiffies. */
39 #define BRC_TIMEOUT (HZ * 5)
40
41 /* Mutex to serialize ovs-brcompatd callbacks.  (Some callbacks naturally hold
42  * br_ioctl_mutex, others hold rtnl_lock, but we can't take the former
43  * ourselves and we don't want to hold the latter over a potentially long
44  * period of time.) */
45 static DEFINE_MUTEX(brc_serial);
46
47 /* Userspace communication. */
48 static DEFINE_SPINLOCK(brc_lock);    /* Ensure atomic access to these vars. */
49 static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */
50 static struct sk_buff *brc_reply;    /* Reply from userspace. */
51 static u32 brc_seq;                  /* Sequence number for current op. */
52
53 static struct sk_buff *brc_send_command(struct sk_buff *,
54                                         struct nlattr **attrs);
55 static int brc_send_simple_command(struct sk_buff *);
56
57 static struct sk_buff *brc_make_request(int op, const char *bridge,
58                                         const char *port)
59 {
60         struct sk_buff *skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
61         if (!skb)
62                 goto error;
63
64         genlmsg_put(skb, 0, 0, &brc_genl_family, 0, op);
65         if (bridge)
66                 NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
67         if (port)
68                 NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port);
69         return skb;
70
71 nla_put_failure:
72         kfree_skb(skb);
73 error:
74         return NULL;
75 }
76
77 static int brc_send_simple_command(struct sk_buff *request)
78 {
79         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
80         struct sk_buff *reply;
81         int error;
82
83         reply = brc_send_command(request, attrs);
84         if (IS_ERR(reply))
85                 return PTR_ERR(reply);
86
87         error = nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
88         kfree_skb(reply);
89         return -error;
90 }
91
92 static int brc_add_del_bridge(char __user *uname, int add)
93 {
94         struct sk_buff *request;
95         char name[IFNAMSIZ];
96
97         if (!capable(CAP_NET_ADMIN))
98                 return -EPERM;
99
100         if (copy_from_user(name, uname, IFNAMSIZ))
101                 return -EFAULT;
102
103         name[IFNAMSIZ - 1] = 0;
104         request = brc_make_request(add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL,
105                                    name, NULL);
106         if (!request)
107                 return -ENOMEM;
108
109         return brc_send_simple_command(request);
110 }
111
112 static int brc_get_indices(int op, const char *br_name,
113                            int __user *uindices, int n)
114 {
115         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
116         struct sk_buff *request, *reply;
117         int *indices;
118         int ret;
119         int len;
120
121         if (n < 0)
122                 return -EINVAL;
123         if (n >= 2048)
124                 return -ENOMEM;
125
126         request = brc_make_request(op, br_name, NULL);
127         if (!request)
128                 return -ENOMEM;
129
130         reply = brc_send_command(request, attrs);
131         ret = PTR_ERR(reply);
132         if (IS_ERR(reply))
133                 goto exit;
134
135         ret = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
136         if (ret < 0)
137                 goto exit_free_skb;
138
139         ret = -EINVAL;
140         if (!attrs[BRC_GENL_A_IFINDEXES])
141                 goto exit_free_skb;
142
143         len = nla_len(attrs[BRC_GENL_A_IFINDEXES]);
144         indices = nla_data(attrs[BRC_GENL_A_IFINDEXES]);
145         if (len % sizeof(int))
146                 goto exit_free_skb;
147
148         n = min_t(int, n, len / sizeof(int));
149         ret = copy_to_user(uindices, indices, n * sizeof(int)) ? -EFAULT : n;
150
151 exit_free_skb:
152         kfree_skb(reply);
153 exit:
154         return ret;
155 }
156
157 /* Called with br_ioctl_mutex. */
158 static int brc_get_bridges(int __user *uindices, int n)
159 {
160         return brc_get_indices(BRC_GENL_C_GET_BRIDGES, NULL, uindices, n);
161 }
162
163 /* Legacy deviceless bridge ioctl's.  Called with br_ioctl_mutex. */
164 static int old_deviceless(void __user *uarg)
165 {
166         unsigned long args[3];
167
168         if (copy_from_user(args, uarg, sizeof(args)))
169                 return -EFAULT;
170
171         switch (args[0]) {
172         case BRCTL_GET_BRIDGES:
173                 return brc_get_bridges((int __user *)args[1], args[2]);
174
175         case BRCTL_ADD_BRIDGE:
176                 return brc_add_del_bridge((void __user *)args[1], 1);
177         case BRCTL_DEL_BRIDGE:
178                 return brc_add_del_bridge((void __user *)args[1], 0);
179         }
180
181         return -EOPNOTSUPP;
182 }
183
184 /* Called with the br_ioctl_mutex. */
185 static int
186 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
187 brc_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)
188 #else
189 brc_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
190 #endif
191 {
192         switch (cmd) {
193         case SIOCGIFBR:
194         case SIOCSIFBR:
195                 return old_deviceless(uarg);
196
197         case SIOCBRADDBR:
198                 return brc_add_del_bridge(uarg, 1);
199         case SIOCBRDELBR:
200                 return brc_add_del_bridge(uarg, 0);
201         }
202
203         return -EOPNOTSUPP;
204 }
205
206 static int brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
207 {
208         struct sk_buff *request;
209         struct net_device *port;
210         int err;
211
212         if (!capable(CAP_NET_ADMIN))
213                 return -EPERM;
214
215         port = __dev_get_by_index(&init_net, port_ifindex);
216         if (!port)
217                 return -EINVAL;
218
219         /* Save name of dev and port because there's a race between the
220          * rtnl_unlock() and the brc_send_simple_command(). */
221         request = brc_make_request(add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL,
222                                    dev->name, port->name);
223         if (!request)
224                 return -ENOMEM;
225
226         rtnl_unlock();
227         err = brc_send_simple_command(request);
228         rtnl_lock();
229
230         return err;
231 }
232
233 static int brc_get_bridge_info(struct net_device *dev,
234                                struct __bridge_info __user *ub)
235 {
236         struct __bridge_info b;
237
238         memset(&b, 0, sizeof(struct __bridge_info));
239
240         /* First two bytes are the priority, which we should skip.  This comes
241          * from struct bridge_id in br_private.h, which is unavailable to us.
242          */
243         memcpy((u8 *)&b.bridge_id + 2, dev->dev_addr, ETH_ALEN);
244         b.stp_enabled = 0;
245
246         if (copy_to_user(ub, &b, sizeof(struct __bridge_info)))
247                 return -EFAULT;
248
249         return 0;
250 }
251
252 static int brc_get_port_list(struct net_device *dev, int __user *uindices,
253                              int num)
254 {
255         int retval;
256
257         rtnl_unlock();
258         retval = brc_get_indices(BRC_GENL_C_GET_PORTS, dev->name,
259                                  uindices, num);
260         rtnl_lock();
261
262         return retval;
263 }
264
265 /*
266  * Format up to a page worth of forwarding table entries
267  * userbuf -- where to copy result
268  * maxnum  -- maximum number of entries desired
269  *            (limited to a page for sanity)
270  * offset  -- number of records to skip
271  */
272 static int brc_get_fdb_entries(struct net_device *dev, void __user *userbuf,
273                                unsigned long maxnum, unsigned long offset)
274 {
275         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
276         struct sk_buff *request, *reply;
277         int retval;
278         int len;
279
280         /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */
281         if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry))
282                 maxnum = PAGE_SIZE/sizeof(struct __fdb_entry);
283
284         request = brc_make_request(BRC_GENL_C_FDB_QUERY, dev->name, NULL);
285         if (!request)
286                 return -ENOMEM;
287         NLA_PUT_U64(request, BRC_GENL_A_FDB_COUNT, maxnum);
288         NLA_PUT_U64(request, BRC_GENL_A_FDB_SKIP, offset);
289
290         rtnl_unlock();
291         reply = brc_send_command(request, attrs);
292         retval = PTR_ERR(reply);
293         if (IS_ERR(reply))
294                 goto exit;
295
296         retval = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
297         if (retval < 0)
298                 goto exit_free_skb;
299
300         retval = -EINVAL;
301         if (!attrs[BRC_GENL_A_FDB_DATA])
302                 goto exit_free_skb;
303         len = nla_len(attrs[BRC_GENL_A_FDB_DATA]);
304         if (len % sizeof(struct __fdb_entry) ||
305             len / sizeof(struct __fdb_entry) > maxnum)
306                 goto exit_free_skb;
307
308         retval = len / sizeof(struct __fdb_entry);
309         if (copy_to_user(userbuf, nla_data(attrs[BRC_GENL_A_FDB_DATA]), len))
310                 retval = -EFAULT;
311
312 exit_free_skb:
313         kfree_skb(reply);
314 exit:
315         rtnl_lock();
316         return retval;
317
318 nla_put_failure:
319         kfree_skb(request);
320         return -ENOMEM;
321 }
322
323 /* Legacy ioctl's through SIOCDEVPRIVATE.  Called with rtnl_lock. */
324 static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
325 {
326         unsigned long args[4];
327
328         if (copy_from_user(args, rq->ifr_data, sizeof(args)))
329                 return -EFAULT;
330
331         switch (args[0]) {
332         case BRCTL_ADD_IF:
333                 return brc_add_del_port(dev, args[1], 1);
334         case BRCTL_DEL_IF:
335                 return brc_add_del_port(dev, args[1], 0);
336
337         case BRCTL_GET_BRIDGE_INFO:
338                 return brc_get_bridge_info(dev, (struct __bridge_info __user *)args[1]);
339
340         case BRCTL_GET_PORT_LIST:
341                 return brc_get_port_list(dev, (int __user *)args[1], args[2]);
342
343         case BRCTL_GET_FDB_ENTRIES:
344                 return brc_get_fdb_entries(dev, (void __user *)args[1],
345                                            args[2], args[3]);
346         }
347
348         return -EOPNOTSUPP;
349 }
350
351 /* Called with the rtnl_lock. */
352 static int brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
353 {
354         int err;
355
356         switch (cmd) {
357         case SIOCDEVPRIVATE:
358                 err = old_dev_ioctl(dev, rq, cmd);
359                 break;
360
361         case SIOCBRADDIF:
362                 return brc_add_del_port(dev, rq->ifr_ifindex, 1);
363         case SIOCBRDELIF:
364                 return brc_add_del_port(dev, rq->ifr_ifindex, 0);
365
366         default:
367                 err = -EOPNOTSUPP;
368                 break;
369         }
370
371         return err;
372 }
373
374
375 static struct genl_family brc_genl_family = {
376         .id = GENL_ID_GENERATE,
377         .hdrsize = 0,
378         .name = BRC_GENL_FAMILY_NAME,
379         .version = 1,
380         .maxattr = BRC_GENL_A_MAX,
381 };
382
383 static int brc_genl_query(struct sk_buff *skb, struct genl_info *info)
384 {
385         int err = -EINVAL;
386         struct sk_buff *ans_skb;
387         void *data;
388
389         ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
390         if (!ans_skb)
391                 return -ENOMEM;
392
393         data = genlmsg_put_reply(ans_skb, info, &brc_genl_family,
394                                  0, BRC_GENL_C_QUERY_MC);
395         if (data == NULL) {
396                 err = -ENOMEM;
397                 goto err;
398         }
399         NLA_PUT_U32(ans_skb, BRC_GENL_A_MC_GROUP, brc_mc_group.id);
400
401         genlmsg_end(ans_skb, data);
402         return genlmsg_reply(ans_skb, info);
403
404 err:
405 nla_put_failure:
406         kfree_skb(ans_skb);
407         return err;
408 }
409
410 /* Attribute policy: what each attribute may contain.  */
411 static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = {
412         [BRC_GENL_A_ERR_CODE] = { .type = NLA_U32 },
413         [BRC_GENL_A_FDB_DATA] = { .type = NLA_UNSPEC },
414 };
415
416 static int brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
417 {
418         unsigned long int flags;
419         int err;
420
421         if (!info->attrs[BRC_GENL_A_ERR_CODE])
422                 return -EINVAL;
423
424         skb = skb_clone(skb, GFP_KERNEL);
425         if (!skb)
426                 return -ENOMEM;
427
428         spin_lock_irqsave(&brc_lock, flags);
429         if (brc_seq == info->snd_seq) {
430                 brc_seq++;
431
432                 kfree_skb(brc_reply);
433                 brc_reply = skb;
434
435                 complete(&brc_done);
436                 err = 0;
437         } else {
438                 kfree_skb(skb);
439                 err = -ESTALE;
440         }
441         spin_unlock_irqrestore(&brc_lock, flags);
442
443         return err;
444 }
445
446 static struct genl_ops brc_genl_ops[] = {
447         { .cmd = BRC_GENL_C_QUERY_MC,
448           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
449           .policy = NULL,
450           .doit = brc_genl_query,
451         },
452         { .cmd = BRC_GENL_C_DP_RESULT,
453           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
454           .policy = brc_genl_policy,
455           .doit = brc_genl_dp_result,
456         },
457 };
458
459 static struct sk_buff *brc_send_command(struct sk_buff *request,
460                                         struct nlattr **attrs)
461 {
462         unsigned long int flags;
463         struct sk_buff *reply;
464         int error;
465
466         mutex_lock(&brc_serial);
467
468         /* Increment sequence number first, so that we ignore any replies
469          * to stale requests. */
470         spin_lock_irqsave(&brc_lock, flags);
471         nlmsg_hdr(request)->nlmsg_seq = ++brc_seq;
472         INIT_COMPLETION(brc_done);
473         spin_unlock_irqrestore(&brc_lock, flags);
474
475         nlmsg_end(request, nlmsg_hdr(request));
476
477         /* Send message. */
478         error = genlmsg_multicast(request, 0, brc_mc_group.id, GFP_KERNEL);
479         if (error < 0)
480                 goto error;
481
482         /* Wait for reply. */
483         error = -ETIMEDOUT;
484         if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT)) {
485                 pr_warn("timed out waiting for userspace\n");
486                 goto error;
487         }
488
489         /* Grab reply. */
490         spin_lock_irqsave(&brc_lock, flags);
491         reply = brc_reply;
492         brc_reply = NULL;
493         spin_unlock_irqrestore(&brc_lock, flags);
494
495         mutex_unlock(&brc_serial);
496
497         /* Re-parse message.  Can't fail, since it parsed correctly once
498          * already. */
499         error = nlmsg_parse(nlmsg_hdr(reply), GENL_HDRLEN,
500                             attrs, BRC_GENL_A_MAX, brc_genl_policy);
501         WARN_ON(error);
502
503         return reply;
504
505 error:
506         mutex_unlock(&brc_serial);
507         return ERR_PTR(error);
508 }
509
510 static int __init brc_init(void)
511 {
512         int err;
513
514         pr_info("Open vSwitch Bridge Compatibility, built "__DATE__" "__TIME__"\n");
515
516         /* Set the bridge ioctl handler */
517         brioctl_set(brc_ioctl_deviceless_stub);
518
519         /* Set the openvswitch_mod device ioctl handler */
520         ovs_dp_ioctl_hook = brc_dev_ioctl;
521
522         /* Randomize the initial sequence number.  This is not a security
523          * feature; it only helps avoid crossed wires between userspace and
524          * the kernel when the module is unloaded and reloaded. */
525         brc_seq = net_random();
526
527         /* Register generic netlink family to communicate changes to
528          * userspace. */
529         err = genl_register_family_with_ops(&brc_genl_family,
530                                             brc_genl_ops, ARRAY_SIZE(brc_genl_ops));
531         if (err)
532                 goto error;
533
534         strcpy(brc_mc_group.name, "brcompat");
535         err = genl_register_mc_group(&brc_genl_family, &brc_mc_group);
536         if (err < 0)
537                 goto err_unregister;
538
539         return 0;
540
541 err_unregister:
542         genl_unregister_family(&brc_genl_family);
543 error:
544         pr_emerg("failed to install!\n");
545         return err;
546 }
547
548 static void brc_cleanup(void)
549 {
550         /* Unregister ioctl hooks */
551         ovs_dp_ioctl_hook = NULL;
552         brioctl_set(NULL);
553
554         genl_unregister_family(&brc_genl_family);
555 }
556
557 module_init(brc_init);
558 module_exit(brc_cleanup);
559
560 MODULE_DESCRIPTION("Open vSwitch bridge compatibility");
561 MODULE_AUTHOR("Nicira Networks");
562 MODULE_LICENSE("GPL");
563
564 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
565 /*
566  * In kernels 2.6.36 and later, Open vSwitch can safely coexist with
567  * the Linux bridge module, but it does not make sense to load both bridge and
568  * brcompat_mod, so this prevents it.
569  */
570 BRIDGE_MUTUAL_EXCLUSION;
571 #endif