datapath: Convert ODP_FLOW_* and ODP_EXECUTE to put dp_idx into message.
[sliver-openvswitch.git] / datapath / brcompat.c
1 /*
2  * Copyright (c) 2009, 2011 Nicira Networks.
3  * Distributed under the terms of the GNU GPL version 2.
4  *
5  * Significant portions of this file may be copied from parts of the Linux
6  * kernel, by Linus Torvalds and others.
7  */
8
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11 #include <linux/kernel.h>
12 #include <asm/uaccess.h>
13 #include <linux/completion.h>
14 #include <linux/etherdevice.h>
15 #include <linux/if_bridge.h>
16 #include <linux/netdevice.h>
17 #include <linux/rtnetlink.h>
18 #include <net/genetlink.h>
19
20 #include "openvswitch/brcompat-netlink.h"
21 #include "brc_procfs.h"
22 #include "datapath.h"
23
24 static struct genl_family brc_genl_family;
25 static struct genl_multicast_group brc_mc_group;
26
27 /* Time to wait for ovs-vswitchd to respond to a datapath action, in
28  * jiffies. */
29 #define BRC_TIMEOUT (HZ * 5)
30
31 /* Mutex to serialize ovs-brcompatd callbacks.  (Some callbacks naturally hold
32  * br_ioctl_mutex, others hold rtnl_lock, but we can't take the former
33  * ourselves and we don't want to hold the latter over a potentially long
34  * period of time.) */
35 static DEFINE_MUTEX(brc_serial);
36
37 /* Userspace communication. */
38 static DEFINE_SPINLOCK(brc_lock);    /* Ensure atomic access to these vars. */
39 static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */
40 static struct sk_buff *brc_reply;    /* Reply from userspace. */
41 static u32 brc_seq;                  /* Sequence number for current op. */
42
43 static struct sk_buff *brc_send_command(struct sk_buff *, struct nlattr **attrs);
44 static int brc_send_simple_command(struct sk_buff *);
45
46 static struct sk_buff *brc_make_request(int op, const char *bridge,
47                                         const char *port)
48 {
49         struct sk_buff *skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
50         if (!skb)
51                 goto error;
52
53         genlmsg_put(skb, 0, 0, &brc_genl_family, 0, op);
54         if (bridge)
55                 NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
56         if (port)
57                 NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port);
58         return skb;
59
60 nla_put_failure:
61         kfree_skb(skb);
62 error:
63         return NULL;
64 }
65
66 static int brc_send_simple_command(struct sk_buff *request)
67 {
68         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
69         struct sk_buff *reply;
70         int error;
71
72         reply = brc_send_command(request, attrs);
73         if (IS_ERR(reply))
74                 return PTR_ERR(reply);
75
76         error = nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
77         kfree_skb(reply);
78         return -error;
79 }
80
81 static int brc_add_del_bridge(char __user *uname, int add)
82 {
83         struct sk_buff *request;
84         char name[IFNAMSIZ];
85
86         if (!capable(CAP_NET_ADMIN))
87                 return -EPERM;
88
89         if (copy_from_user(name, uname, IFNAMSIZ))
90                 return -EFAULT;
91
92         name[IFNAMSIZ - 1] = 0;
93         request = brc_make_request(add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL,
94                                    name, NULL);
95         if (!request)
96                 return -ENOMEM;
97
98         return brc_send_simple_command(request);
99 }
100
101 static int brc_get_indices(int op, const char *br_name,
102                            int __user *uindices, int n)
103 {
104         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
105         struct sk_buff *request, *reply;
106         int *indices;
107         int ret;
108         int len;
109
110         if (n < 0)
111                 return -EINVAL;
112         if (n >= 2048)
113                 return -ENOMEM;
114
115         request = brc_make_request(op, br_name, NULL);
116         if (!request)
117                 return -ENOMEM;
118
119         reply = brc_send_command(request, attrs);
120         ret = PTR_ERR(reply);
121         if (IS_ERR(reply))
122                 goto exit;
123
124         ret = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
125         if (ret < 0)
126                 goto exit_free_skb;
127
128         ret = -EINVAL;
129         if (!attrs[BRC_GENL_A_IFINDEXES])
130                 goto exit_free_skb;
131
132         len = nla_len(attrs[BRC_GENL_A_IFINDEXES]);
133         indices = nla_data(attrs[BRC_GENL_A_IFINDEXES]);
134         if (len % sizeof(int))
135                 goto exit_free_skb;
136
137         n = min_t(int, n, len / sizeof(int));
138         ret = copy_to_user(uindices, indices, n * sizeof(int)) ? -EFAULT : n;
139
140 exit_free_skb:
141         kfree_skb(reply);
142 exit:
143         return ret;
144 }
145
146 /* Called with br_ioctl_mutex. */
147 static int brc_get_bridges(int __user *uindices, int n)
148 {
149         return brc_get_indices(BRC_GENL_C_GET_BRIDGES, NULL, uindices, n);
150 }
151
152 /* Legacy deviceless bridge ioctl's.  Called with br_ioctl_mutex. */
153 static int old_deviceless(void __user *uarg)
154 {
155         unsigned long args[3];
156
157         if (copy_from_user(args, uarg, sizeof(args)))
158                 return -EFAULT;
159
160         switch (args[0]) {
161         case BRCTL_GET_BRIDGES:
162                 return brc_get_bridges((int __user *)args[1], args[2]);
163
164         case BRCTL_ADD_BRIDGE:
165                 return brc_add_del_bridge((void __user *)args[1], 1);
166         case BRCTL_DEL_BRIDGE:
167                 return brc_add_del_bridge((void __user *)args[1], 0);
168         }
169
170         return -EOPNOTSUPP;
171 }
172
173 /* Called with the br_ioctl_mutex. */
174 static int
175 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
176 brc_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)
177 #else
178 brc_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
179 #endif
180 {
181         switch (cmd) {
182         case SIOCGIFBR:
183         case SIOCSIFBR:
184                 return old_deviceless(uarg);
185
186         case SIOCBRADDBR:
187                 return brc_add_del_bridge(uarg, 1);
188         case SIOCBRDELBR:
189                 return brc_add_del_bridge(uarg, 0);
190         }
191
192         return -EOPNOTSUPP;
193 }
194
195 static int brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
196 {
197         struct sk_buff *request;
198         struct net_device *port;
199         int err;
200
201         if (!capable(CAP_NET_ADMIN))
202                 return -EPERM;
203
204         port = __dev_get_by_index(&init_net, port_ifindex);
205         if (!port)
206                 return -EINVAL;
207
208         /* Save name of dev and port because there's a race between the
209          * rtnl_unlock() and the brc_send_simple_command(). */
210         request = brc_make_request(add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL,
211                                    dev->name, port->name);
212         if (!request)
213                 return -ENOMEM;
214
215         rtnl_unlock();
216         err = brc_send_simple_command(request);
217         rtnl_lock();
218
219         return err;
220 }
221
222 static int brc_get_bridge_info(struct net_device *dev,
223                                struct __bridge_info __user *ub)
224 {
225         struct __bridge_info b;
226
227         memset(&b, 0, sizeof(struct __bridge_info));
228
229         /* First two bytes are the priority, which we should skip.  This comes
230          * from struct bridge_id in br_private.h, which is unavailable to us.
231          */
232         memcpy((u8 *)&b.bridge_id + 2, dev->dev_addr, ETH_ALEN);
233         b.stp_enabled = 0;
234
235         if (copy_to_user(ub, &b, sizeof(struct __bridge_info)))
236                 return -EFAULT;
237
238         return 0;
239 }
240
241 static int brc_get_port_list(struct net_device *dev, int __user *uindices,
242                              int num)
243 {
244         int retval;
245
246         rtnl_unlock();
247         retval = brc_get_indices(BRC_GENL_C_GET_PORTS, dev->name,
248                                  uindices, num);
249         rtnl_lock();
250
251         return retval;
252 }
253
254 /*
255  * Format up to a page worth of forwarding table entries
256  * userbuf -- where to copy result
257  * maxnum  -- maximum number of entries desired
258  *            (limited to a page for sanity)
259  * offset  -- number of records to skip
260  */
261 static int brc_get_fdb_entries(struct net_device *dev, void __user *userbuf,
262                                unsigned long maxnum, unsigned long offset)
263 {
264         struct nlattr *attrs[BRC_GENL_A_MAX + 1];
265         struct sk_buff *request, *reply;
266         int retval;
267         int len;
268
269         /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */
270         if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry))
271                 maxnum = PAGE_SIZE/sizeof(struct __fdb_entry);
272
273         request = brc_make_request(BRC_GENL_C_FDB_QUERY, dev->name, NULL);
274         if (!request)
275                 return -ENOMEM;
276         NLA_PUT_U64(request, BRC_GENL_A_FDB_COUNT, maxnum);
277         NLA_PUT_U64(request, BRC_GENL_A_FDB_SKIP, offset);
278
279         rtnl_unlock();
280         reply = brc_send_command(request, attrs);
281         retval = PTR_ERR(reply);
282         if (IS_ERR(reply))
283                 goto exit;
284
285         retval = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
286         if (retval < 0)
287                 goto exit_free_skb;
288
289         retval = -EINVAL;
290         if (!attrs[BRC_GENL_A_FDB_DATA])
291                 goto exit_free_skb;
292         len = nla_len(attrs[BRC_GENL_A_FDB_DATA]);
293         if (len % sizeof(struct __fdb_entry) ||
294             len / sizeof(struct __fdb_entry) > maxnum)
295                 goto exit_free_skb;
296
297         retval = len / sizeof(struct __fdb_entry);
298         if (copy_to_user(userbuf, nla_data(attrs[BRC_GENL_A_FDB_DATA]), len))
299                 retval = -EFAULT;
300
301 exit_free_skb:
302         kfree_skb(reply);
303 exit:
304         rtnl_lock();
305         return retval;
306
307 nla_put_failure:
308         kfree_skb(request);
309         return -ENOMEM;
310 }
311
312 /* Legacy ioctl's through SIOCDEVPRIVATE.  Called with rtnl_lock. */
313 static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
314 {
315         unsigned long args[4];
316
317         if (copy_from_user(args, rq->ifr_data, sizeof(args)))
318                 return -EFAULT;
319
320         switch (args[0]) {
321         case BRCTL_ADD_IF:
322                 return brc_add_del_port(dev, args[1], 1);
323         case BRCTL_DEL_IF:
324                 return brc_add_del_port(dev, args[1], 0);
325
326         case BRCTL_GET_BRIDGE_INFO:
327                 return brc_get_bridge_info(dev, (struct __bridge_info __user *)args[1]);
328
329         case BRCTL_GET_PORT_LIST:
330                 return brc_get_port_list(dev, (int __user *)args[1], args[2]);
331
332         case BRCTL_GET_FDB_ENTRIES:
333                 return brc_get_fdb_entries(dev, (void __user *)args[1],
334                                            args[2], args[3]);
335         }
336
337         return -EOPNOTSUPP;
338 }
339
340 /* Called with the rtnl_lock. */
341 static int brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
342 {
343         int err;
344
345         switch (cmd) {
346                 case SIOCDEVPRIVATE:
347                         err = old_dev_ioctl(dev, rq, cmd);
348                         break;
349
350                 case SIOCBRADDIF:
351                         return brc_add_del_port(dev, rq->ifr_ifindex, 1);
352                 case SIOCBRDELIF:
353                         return brc_add_del_port(dev, rq->ifr_ifindex, 0);
354
355                 default:
356                         err = -EOPNOTSUPP;
357                         break;
358         }
359
360         return err;
361 }
362
363
364 static struct genl_family brc_genl_family = {
365         .id = GENL_ID_GENERATE,
366         .hdrsize = 0,
367         .name = BRC_GENL_FAMILY_NAME,
368         .version = 1,
369         .maxattr = BRC_GENL_A_MAX,
370 };
371
372 static int brc_genl_query(struct sk_buff *skb, struct genl_info *info)
373 {
374         int err = -EINVAL;
375         struct sk_buff *ans_skb;
376         void *data;
377
378         ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
379         if (!ans_skb)
380                 return -ENOMEM;
381
382         data = genlmsg_put_reply(ans_skb, info, &brc_genl_family,
383                                  0, BRC_GENL_C_QUERY_MC);
384         if (data == NULL) {
385                 err = -ENOMEM;
386                 goto err;
387         }
388         NLA_PUT_U32(ans_skb, BRC_GENL_A_MC_GROUP, brc_mc_group.id);
389
390         genlmsg_end(ans_skb, data);
391         return genlmsg_reply(ans_skb, info);
392
393 err:
394 nla_put_failure:
395         kfree_skb(ans_skb);
396         return err;
397 }
398
399 static struct genl_ops brc_genl_ops_query_dp = {
400         .cmd = BRC_GENL_C_QUERY_MC,
401         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
402         .policy = NULL,
403         .doit = brc_genl_query,
404         .dumpit = NULL
405 };
406
407 /* Attribute policy: what each attribute may contain.  */
408 static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = {
409         [BRC_GENL_A_ERR_CODE] = { .type = NLA_U32 },
410
411 #ifdef HAVE_NLA_NUL_STRING
412         [BRC_GENL_A_PROC_DIR] = { .type = NLA_NUL_STRING,
413                                   .len = BRC_NAME_LEN_MAX },
414         [BRC_GENL_A_PROC_NAME] = { .type = NLA_NUL_STRING,
415                                   .len = BRC_NAME_LEN_MAX },
416         [BRC_GENL_A_PROC_DATA] = { .type = NLA_NUL_STRING },
417 #endif
418
419         [BRC_GENL_A_FDB_DATA] = { .type = NLA_UNSPEC },
420 };
421
422 static int brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
423 {
424         unsigned long int flags;
425         int err;
426
427         if (!info->attrs[BRC_GENL_A_ERR_CODE])
428                 return -EINVAL;
429
430         skb = skb_clone(skb, GFP_KERNEL);
431         if (!skb)
432                 return -ENOMEM;
433
434         spin_lock_irqsave(&brc_lock, flags);
435         if (brc_seq == info->snd_seq) {
436                 brc_seq++;
437
438                 kfree_skb(brc_reply);
439                 brc_reply = skb;
440
441                 complete(&brc_done);
442                 err = 0;
443         } else {
444                 kfree_skb(skb);
445                 err = -ESTALE;
446         }
447         spin_unlock_irqrestore(&brc_lock, flags);
448
449         return err;
450 }
451
452 static struct genl_ops brc_genl_ops_dp_result = {
453         .cmd = BRC_GENL_C_DP_RESULT,
454         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
455         .policy = brc_genl_policy,
456         .doit = brc_genl_dp_result,
457         .dumpit = NULL
458 };
459
460 static struct genl_ops brc_genl_ops_set_proc = {
461         .cmd = BRC_GENL_C_SET_PROC,
462         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
463         .policy = brc_genl_policy,
464         .doit = brc_genl_set_proc,
465         .dumpit = NULL
466 };
467
468 static struct sk_buff *brc_send_command(struct sk_buff *request,
469                                         struct nlattr **attrs)
470 {
471         unsigned long int flags;
472         struct sk_buff *reply;
473         int error;
474
475         mutex_lock(&brc_serial);
476
477         /* Increment sequence number first, so that we ignore any replies
478          * to stale requests. */
479         spin_lock_irqsave(&brc_lock, flags);
480         nlmsg_hdr(request)->nlmsg_seq = ++brc_seq;
481         INIT_COMPLETION(brc_done);
482         spin_unlock_irqrestore(&brc_lock, flags);
483
484         nlmsg_end(request, nlmsg_hdr(request));
485
486         /* Send message. */
487         error = genlmsg_multicast(request, 0, brc_mc_group.id, GFP_KERNEL);
488         if (error < 0)
489                 goto error;
490
491         /* Wait for reply. */
492         error = -ETIMEDOUT;
493         if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT)) {
494                 pr_warn("timed out waiting for userspace\n");
495                 goto error;
496     }
497
498         /* Grab reply. */
499         spin_lock_irqsave(&brc_lock, flags);
500         reply = brc_reply;
501         brc_reply = NULL;
502         spin_unlock_irqrestore(&brc_lock, flags);
503
504         mutex_unlock(&brc_serial);
505
506         /* Re-parse message.  Can't fail, since it parsed correctly once
507          * already. */
508         error = nlmsg_parse(nlmsg_hdr(reply), GENL_HDRLEN,
509                             attrs, BRC_GENL_A_MAX, brc_genl_policy);
510         WARN_ON(error);
511
512         return reply;
513
514 error:
515         mutex_unlock(&brc_serial);
516         return ERR_PTR(error);
517 }
518
519 static int __init brc_init(void)
520 {
521         int err;
522
523         printk("Open vSwitch Bridge Compatibility, built "__DATE__" "__TIME__"\n");
524
525         /* Set the bridge ioctl handler */
526         brioctl_set(brc_ioctl_deviceless_stub);
527
528         /* Set the openvswitch_mod device ioctl handler */
529         dp_ioctl_hook = brc_dev_ioctl;
530
531         /* Randomize the initial sequence number.  This is not a security
532          * feature; it only helps avoid crossed wires between userspace and
533          * the kernel when the module is unloaded and reloaded. */
534         brc_seq = net_random();
535
536         /* Register generic netlink family to communicate changes to
537          * userspace. */
538         err = genl_register_family(&brc_genl_family);
539         if (err)
540                 goto error;
541
542         err = genl_register_ops(&brc_genl_family, &brc_genl_ops_query_dp);
543         if (err != 0)
544                 goto err_unregister;
545
546         err = genl_register_ops(&brc_genl_family, &brc_genl_ops_dp_result);
547         if (err != 0)
548                 goto err_unregister;
549
550         err = genl_register_ops(&brc_genl_family, &brc_genl_ops_set_proc);
551         if (err != 0)
552                 goto err_unregister;
553
554         strcpy(brc_mc_group.name, "brcompat");
555         err = genl_register_mc_group(&brc_genl_family, &brc_mc_group);
556         if (err < 0)
557                 goto err_unregister;
558
559         return 0;
560
561 err_unregister:
562         genl_unregister_family(&brc_genl_family);
563 error:
564         pr_emerg("failed to install!\n");
565         return err;
566 }
567
568 static void brc_cleanup(void)
569 {
570         /* Unregister ioctl hooks */
571         dp_ioctl_hook = NULL;
572         brioctl_set(NULL);
573
574         genl_unregister_family(&brc_genl_family);
575         brc_procfs_exit();
576 }
577
578 module_init(brc_init);
579 module_exit(brc_cleanup);
580
581 MODULE_DESCRIPTION("Open vSwitch bridge compatibility");
582 MODULE_AUTHOR("Nicira Networks");
583 MODULE_LICENSE("GPL");