2 * Copyright (c) 2009, 2011 Nicira Networks.
3 * Distributed under the terms of the GNU GPL version 2.
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 #include <linux/kernel.h>
12 #include <asm/uaccess.h>
13 #include <linux/completion.h>
14 #include <linux/etherdevice.h>
15 #include <linux/if_bridge.h>
16 #include <linux/netdevice.h>
17 #include <linux/rtnetlink.h>
18 #include <net/genetlink.h>
20 #include "openvswitch/brcompat-netlink.h"
21 #include "brc_procfs.h"
24 static struct genl_family brc_genl_family;
25 static struct genl_multicast_group brc_mc_group;
27 /* Time to wait for ovs-vswitchd to respond to a datapath action, in
29 #define BRC_TIMEOUT (HZ * 5)
31 /* Mutex to serialize ovs-brcompatd callbacks. (Some callbacks naturally hold
32 * br_ioctl_mutex, others hold rtnl_lock, but we can't take the former
33 * ourselves and we don't want to hold the latter over a potentially long
35 static DEFINE_MUTEX(brc_serial);
37 /* Userspace communication. */
38 static DEFINE_SPINLOCK(brc_lock); /* Ensure atomic access to these vars. */
39 static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */
40 static struct sk_buff *brc_reply; /* Reply from userspace. */
41 static u32 brc_seq; /* Sequence number for current op. */
43 static struct sk_buff *brc_send_command(struct sk_buff *, struct nlattr **attrs);
44 static int brc_send_simple_command(struct sk_buff *);
46 static struct sk_buff *brc_make_request(int op, const char *bridge,
49 struct sk_buff *skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
53 genlmsg_put(skb, 0, 0, &brc_genl_family, 0, op);
55 NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
57 NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port);
66 static int brc_send_simple_command(struct sk_buff *request)
68 struct nlattr *attrs[BRC_GENL_A_MAX + 1];
69 struct sk_buff *reply;
72 reply = brc_send_command(request, attrs);
74 return PTR_ERR(reply);
76 error = nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
81 static int brc_add_del_bridge(char __user *uname, int add)
83 struct sk_buff *request;
86 if (!capable(CAP_NET_ADMIN))
89 if (copy_from_user(name, uname, IFNAMSIZ))
92 name[IFNAMSIZ - 1] = 0;
93 request = brc_make_request(add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL,
98 return brc_send_simple_command(request);
101 static int brc_get_indices(int op, const char *br_name,
102 int __user *uindices, int n)
104 struct nlattr *attrs[BRC_GENL_A_MAX + 1];
105 struct sk_buff *request, *reply;
115 request = brc_make_request(op, br_name, NULL);
119 reply = brc_send_command(request, attrs);
120 ret = PTR_ERR(reply);
124 ret = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
129 if (!attrs[BRC_GENL_A_IFINDEXES])
132 len = nla_len(attrs[BRC_GENL_A_IFINDEXES]);
133 indices = nla_data(attrs[BRC_GENL_A_IFINDEXES]);
134 if (len % sizeof(int))
137 n = min_t(int, n, len / sizeof(int));
138 ret = copy_to_user(uindices, indices, n * sizeof(int)) ? -EFAULT : n;
146 /* Called with br_ioctl_mutex. */
147 static int brc_get_bridges(int __user *uindices, int n)
149 return brc_get_indices(BRC_GENL_C_GET_BRIDGES, NULL, uindices, n);
152 /* Legacy deviceless bridge ioctl's. Called with br_ioctl_mutex. */
153 static int old_deviceless(void __user *uarg)
155 unsigned long args[3];
157 if (copy_from_user(args, uarg, sizeof(args)))
161 case BRCTL_GET_BRIDGES:
162 return brc_get_bridges((int __user *)args[1], args[2]);
164 case BRCTL_ADD_BRIDGE:
165 return brc_add_del_bridge((void __user *)args[1], 1);
166 case BRCTL_DEL_BRIDGE:
167 return brc_add_del_bridge((void __user *)args[1], 0);
173 /* Called with the br_ioctl_mutex. */
175 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
176 brc_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)
178 brc_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
184 return old_deviceless(uarg);
187 return brc_add_del_bridge(uarg, 1);
189 return brc_add_del_bridge(uarg, 0);
195 static int brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
197 struct sk_buff *request;
198 struct net_device *port;
201 if (!capable(CAP_NET_ADMIN))
204 port = __dev_get_by_index(&init_net, port_ifindex);
208 /* Save name of dev and port because there's a race between the
209 * rtnl_unlock() and the brc_send_simple_command(). */
210 request = brc_make_request(add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL,
211 dev->name, port->name);
216 err = brc_send_simple_command(request);
222 static int brc_get_bridge_info(struct net_device *dev,
223 struct __bridge_info __user *ub)
225 struct __bridge_info b;
227 memset(&b, 0, sizeof(struct __bridge_info));
229 /* First two bytes are the priority, which we should skip. This comes
230 * from struct bridge_id in br_private.h, which is unavailable to us.
232 memcpy((u8 *)&b.bridge_id + 2, dev->dev_addr, ETH_ALEN);
235 if (copy_to_user(ub, &b, sizeof(struct __bridge_info)))
241 static int brc_get_port_list(struct net_device *dev, int __user *uindices,
247 retval = brc_get_indices(BRC_GENL_C_GET_PORTS, dev->name,
255 * Format up to a page worth of forwarding table entries
256 * userbuf -- where to copy result
257 * maxnum -- maximum number of entries desired
258 * (limited to a page for sanity)
259 * offset -- number of records to skip
261 static int brc_get_fdb_entries(struct net_device *dev, void __user *userbuf,
262 unsigned long maxnum, unsigned long offset)
264 struct nlattr *attrs[BRC_GENL_A_MAX + 1];
265 struct sk_buff *request, *reply;
269 /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */
270 if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry))
271 maxnum = PAGE_SIZE/sizeof(struct __fdb_entry);
273 request = brc_make_request(BRC_GENL_C_FDB_QUERY, dev->name, NULL);
276 NLA_PUT_U64(request, BRC_GENL_A_FDB_COUNT, maxnum);
277 NLA_PUT_U64(request, BRC_GENL_A_FDB_SKIP, offset);
280 reply = brc_send_command(request, attrs);
281 retval = PTR_ERR(reply);
285 retval = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
290 if (!attrs[BRC_GENL_A_FDB_DATA])
292 len = nla_len(attrs[BRC_GENL_A_FDB_DATA]);
293 if (len % sizeof(struct __fdb_entry) ||
294 len / sizeof(struct __fdb_entry) > maxnum)
297 retval = len / sizeof(struct __fdb_entry);
298 if (copy_to_user(userbuf, nla_data(attrs[BRC_GENL_A_FDB_DATA]), len))
312 /* Legacy ioctl's through SIOCDEVPRIVATE. Called with rtnl_lock. */
313 static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
315 unsigned long args[4];
317 if (copy_from_user(args, rq->ifr_data, sizeof(args)))
322 return brc_add_del_port(dev, args[1], 1);
324 return brc_add_del_port(dev, args[1], 0);
326 case BRCTL_GET_BRIDGE_INFO:
327 return brc_get_bridge_info(dev, (struct __bridge_info __user *)args[1]);
329 case BRCTL_GET_PORT_LIST:
330 return brc_get_port_list(dev, (int __user *)args[1], args[2]);
332 case BRCTL_GET_FDB_ENTRIES:
333 return brc_get_fdb_entries(dev, (void __user *)args[1],
340 /* Called with the rtnl_lock. */
341 static int brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
347 err = old_dev_ioctl(dev, rq, cmd);
351 return brc_add_del_port(dev, rq->ifr_ifindex, 1);
353 return brc_add_del_port(dev, rq->ifr_ifindex, 0);
364 static struct genl_family brc_genl_family = {
365 .id = GENL_ID_GENERATE,
367 .name = BRC_GENL_FAMILY_NAME,
369 .maxattr = BRC_GENL_A_MAX,
372 static int brc_genl_query(struct sk_buff *skb, struct genl_info *info)
375 struct sk_buff *ans_skb;
378 ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
382 data = genlmsg_put_reply(ans_skb, info, &brc_genl_family,
383 0, BRC_GENL_C_QUERY_MC);
388 NLA_PUT_U32(ans_skb, BRC_GENL_A_MC_GROUP, brc_mc_group.id);
390 genlmsg_end(ans_skb, data);
391 return genlmsg_reply(ans_skb, info);
399 static struct genl_ops brc_genl_ops_query_dp = {
400 .cmd = BRC_GENL_C_QUERY_MC,
401 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
403 .doit = brc_genl_query,
407 /* Attribute policy: what each attribute may contain. */
408 static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = {
409 [BRC_GENL_A_ERR_CODE] = { .type = NLA_U32 },
411 #ifdef HAVE_NLA_NUL_STRING
412 [BRC_GENL_A_PROC_DIR] = { .type = NLA_NUL_STRING,
413 .len = BRC_NAME_LEN_MAX },
414 [BRC_GENL_A_PROC_NAME] = { .type = NLA_NUL_STRING,
415 .len = BRC_NAME_LEN_MAX },
416 [BRC_GENL_A_PROC_DATA] = { .type = NLA_NUL_STRING },
419 [BRC_GENL_A_FDB_DATA] = { .type = NLA_UNSPEC },
422 static int brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
424 unsigned long int flags;
427 if (!info->attrs[BRC_GENL_A_ERR_CODE])
430 skb = skb_clone(skb, GFP_KERNEL);
434 spin_lock_irqsave(&brc_lock, flags);
435 if (brc_seq == info->snd_seq) {
438 kfree_skb(brc_reply);
447 spin_unlock_irqrestore(&brc_lock, flags);
452 static struct genl_ops brc_genl_ops_dp_result = {
453 .cmd = BRC_GENL_C_DP_RESULT,
454 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
455 .policy = brc_genl_policy,
456 .doit = brc_genl_dp_result,
460 static struct genl_ops brc_genl_ops_set_proc = {
461 .cmd = BRC_GENL_C_SET_PROC,
462 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
463 .policy = brc_genl_policy,
464 .doit = brc_genl_set_proc,
468 static struct sk_buff *brc_send_command(struct sk_buff *request,
469 struct nlattr **attrs)
471 unsigned long int flags;
472 struct sk_buff *reply;
475 mutex_lock(&brc_serial);
477 /* Increment sequence number first, so that we ignore any replies
478 * to stale requests. */
479 spin_lock_irqsave(&brc_lock, flags);
480 nlmsg_hdr(request)->nlmsg_seq = ++brc_seq;
481 INIT_COMPLETION(brc_done);
482 spin_unlock_irqrestore(&brc_lock, flags);
484 nlmsg_end(request, nlmsg_hdr(request));
487 error = genlmsg_multicast(request, 0, brc_mc_group.id, GFP_KERNEL);
491 /* Wait for reply. */
493 if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT)) {
494 pr_warn("timed out waiting for userspace\n");
499 spin_lock_irqsave(&brc_lock, flags);
502 spin_unlock_irqrestore(&brc_lock, flags);
504 mutex_unlock(&brc_serial);
506 /* Re-parse message. Can't fail, since it parsed correctly once
508 error = nlmsg_parse(nlmsg_hdr(reply), GENL_HDRLEN,
509 attrs, BRC_GENL_A_MAX, brc_genl_policy);
515 mutex_unlock(&brc_serial);
516 return ERR_PTR(error);
519 static int __init brc_init(void)
523 printk("Open vSwitch Bridge Compatibility, built "__DATE__" "__TIME__"\n");
525 /* Set the bridge ioctl handler */
526 brioctl_set(brc_ioctl_deviceless_stub);
528 /* Set the openvswitch_mod device ioctl handler */
529 dp_ioctl_hook = brc_dev_ioctl;
531 /* Randomize the initial sequence number. This is not a security
532 * feature; it only helps avoid crossed wires between userspace and
533 * the kernel when the module is unloaded and reloaded. */
534 brc_seq = net_random();
536 /* Register generic netlink family to communicate changes to
538 err = genl_register_family(&brc_genl_family);
542 err = genl_register_ops(&brc_genl_family, &brc_genl_ops_query_dp);
546 err = genl_register_ops(&brc_genl_family, &brc_genl_ops_dp_result);
550 err = genl_register_ops(&brc_genl_family, &brc_genl_ops_set_proc);
554 strcpy(brc_mc_group.name, "brcompat");
555 err = genl_register_mc_group(&brc_genl_family, &brc_mc_group);
562 genl_unregister_family(&brc_genl_family);
564 pr_emerg("failed to install!\n");
568 static void brc_cleanup(void)
570 /* Unregister ioctl hooks */
571 dp_ioctl_hook = NULL;
574 genl_unregister_family(&brc_genl_family);
578 module_init(brc_init);
579 module_exit(brc_cleanup);
581 MODULE_DESCRIPTION("Open vSwitch bridge compatibility");
582 MODULE_AUTHOR("Nicira Networks");
583 MODULE_LICENSE("GPL");