X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=datapath%2Fbrcompat.c;h=bd223c7dfe070f920f31991bc532913a12e23fba;hb=2d5285e1dfab3d67dd20578b65afc2c4ef6065e9;hp=c4baac38df7a4b0b757304ec224c7a6b7a760c11;hpb=806e39cfdf17ae61221eefb6e292f99c8abc2efc;p=sliver-openvswitch.git diff --git a/datapath/brcompat.c b/datapath/brcompat.c index c4baac38d..bd223c7df 100644 --- a/datapath/brcompat.c +++ b/datapath/brcompat.c @@ -1,20 +1,25 @@ +/* + * Copyright (c) 2009, 2011 Nicira Networks. + * Distributed under the terms of the GNU GPL version 2. + * + * Significant portions of this file may be copied from parts of the Linux + * kernel, by Linus Torvalds and others. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include #include -#include +#include #include -#include #include #include -#include #include #include #include -#include "compat.h" #include "openvswitch/brcompat-netlink.h" -#include "brc_procfs.h" -#include "brc_sysfs.h" #include "datapath.h" -#include "dp_dev.h" static struct genl_family brc_genl_family; static struct genl_multicast_group brc_mc_group; @@ -32,76 +37,121 @@ static DEFINE_MUTEX(brc_serial); /* Userspace communication. */ static DEFINE_SPINLOCK(brc_lock); /* Ensure atomic access to these vars. */ static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */ -static int brc_err; /* Error code from userspace. */ +static struct sk_buff *brc_reply; /* Reply from userspace. */ static u32 brc_seq; /* Sequence number for current op. */ -static int brc_send_command(const char *bridge, const char *port, int op); +static struct sk_buff *brc_send_command(struct sk_buff *, + struct nlattr **attrs); +static int brc_send_simple_command(struct sk_buff *); -static int -get_dp_ifindices(int *indices, int num) +static struct sk_buff *brc_make_request(int op, const char *bridge, + const char *port) { - int i, index = 0; - - rcu_read_lock(); - for (i=0; i < ODP_MAX && index < num; i++) { - struct datapath *dp = get_dp(i); - if (!dp) - continue; - indices[index++] = dp->ports[ODPP_LOCAL]->dev->ifindex; - } - rcu_read_unlock(); + struct sk_buff *skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + goto error; + + genlmsg_put(skb, 0, 0, &brc_genl_family, 0, op); + if (bridge) + NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge); + if (port) + NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port); + return skb; - return index; +nla_put_failure: + kfree_skb(skb); +error: + return NULL; } -static void -get_port_ifindices(struct datapath *dp, int *ifindices, int num) +static int brc_send_simple_command(struct sk_buff *request) { - struct net_bridge_port *p; + struct nlattr *attrs[BRC_GENL_A_MAX + 1]; + struct sk_buff *reply; + int error; - rcu_read_lock(); - list_for_each_entry_rcu (p, &dp->port_list, node) { - if (p->port_no < num) - ifindices[p->port_no] = p->dev->ifindex; - } - rcu_read_unlock(); + reply = brc_send_command(request, attrs); + if (IS_ERR(reply)) + return PTR_ERR(reply); + + error = nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]); + kfree_skb(reply); + return -error; } static int brc_add_del_bridge(char __user *uname, int add) { + struct sk_buff *request; char name[IFNAMSIZ]; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (copy_from_user(name, uname, IFNAMSIZ)) return -EFAULT; name[IFNAMSIZ - 1] = 0; - return brc_send_command(name, NULL, - add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL); + request = brc_make_request(add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL, + name, NULL); + if (!request) + return -ENOMEM; + + return brc_send_simple_command(request); } -static int brc_get_bridges(int __user *uindices, int n) +static int brc_get_indices(int op, const char *br_name, + int __user *uindices, int n) { + struct nlattr *attrs[BRC_GENL_A_MAX + 1]; + struct sk_buff *request, *reply; int *indices; int ret; + int len; + if (n < 0) + return -EINVAL; if (n >= 2048) return -ENOMEM; - indices = kcalloc(n, sizeof(int), GFP_KERNEL); - if (indices == NULL) + request = brc_make_request(op, br_name, NULL); + if (!request) return -ENOMEM; - n = get_dp_ifindices(indices, n); + reply = brc_send_command(request, attrs); + ret = PTR_ERR(reply); + if (IS_ERR(reply)) + goto exit; + + ret = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]); + if (ret < 0) + goto exit_free_skb; + + ret = -EINVAL; + if (!attrs[BRC_GENL_A_IFINDEXES]) + goto exit_free_skb; + len = nla_len(attrs[BRC_GENL_A_IFINDEXES]); + indices = nla_data(attrs[BRC_GENL_A_IFINDEXES]); + if (len % sizeof(int)) + goto exit_free_skb; + + n = min_t(int, n, len / sizeof(int)); ret = copy_to_user(uindices, indices, n * sizeof(int)) ? -EFAULT : n; - kfree(indices); +exit_free_skb: + kfree_skb(reply); +exit: return ret; } +/* Called with br_ioctl_mutex. */ +static int brc_get_bridges(int __user *uindices, int n) +{ + return brc_get_indices(BRC_GENL_C_GET_BRIDGES, NULL, uindices, n); +} + /* Legacy deviceless bridge ioctl's. Called with br_ioctl_mutex. */ -static int -old_deviceless(void __user *uarg) +static int old_deviceless(void __user *uarg) { unsigned long args[3]; @@ -143,42 +193,44 @@ brc_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg) return -EOPNOTSUPP; } -static int -brc_add_del_port(struct net_device *dev, int port_ifindex, int add) +static int brc_add_del_port(struct net_device *dev, int port_ifindex, int add) { + struct sk_buff *request; struct net_device *port; - char dev_name[IFNAMSIZ], port_name[IFNAMSIZ]; int err; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + port = __dev_get_by_index(&init_net, port_ifindex); if (!port) return -EINVAL; /* Save name of dev and port because there's a race between the - * rtnl_unlock() and the brc_send_command(). */ - strcpy(dev_name, dev->name); - strcpy(port_name, port->name); + * rtnl_unlock() and the brc_send_simple_command(). */ + request = brc_make_request(add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL, + dev->name, port->name); + if (!request) + return -ENOMEM; rtnl_unlock(); - err = brc_send_command(dev_name, port_name, - add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL); + err = brc_send_simple_command(request); rtnl_lock(); return err; } -static int -brc_get_bridge_info(struct net_device *dev, struct __bridge_info __user *ub) +static int brc_get_bridge_info(struct net_device *dev, + struct __bridge_info __user *ub) { struct __bridge_info b; - u64 id = 0; - int i; memset(&b, 0, sizeof(struct __bridge_info)); - for (i=0; idev_addr[i] << (8*(ETH_ALEN-1 - i)); - b.bridge_id = cpu_to_be64(id); + /* First two bytes are the priority, which we should skip. This comes + * from struct bridge_id in br_private.h, which is unavailable to us. + */ + memcpy((u8 *)&b.bridge_id + 2, dev->dev_addr, ETH_ALEN); b.stp_enabled = 0; if (copy_to_user(ub, &b, sizeof(struct __bridge_info))) @@ -187,34 +239,79 @@ brc_get_bridge_info(struct net_device *dev, struct __bridge_info __user *ub) return 0; } -static int -brc_get_port_list(struct net_device *dev, int __user *uindices, int num) +static int brc_get_port_list(struct net_device *dev, int __user *uindices, + int num) { - struct dp_dev *dp_dev = netdev_priv(dev); - struct datapath *dp = dp_dev->dp; - int *indices; + int retval; - if (num < 0) - return -EINVAL; - if (num == 0) - num = 256; - if (num > DP_MAX_PORTS) - num = DP_MAX_PORTS; + rtnl_unlock(); + retval = brc_get_indices(BRC_GENL_C_GET_PORTS, dev->name, + uindices, num); + rtnl_lock(); + + return retval; +} + +/* + * Format up to a page worth of forwarding table entries + * userbuf -- where to copy result + * maxnum -- maximum number of entries desired + * (limited to a page for sanity) + * offset -- number of records to skip + */ +static int brc_get_fdb_entries(struct net_device *dev, void __user *userbuf, + unsigned long maxnum, unsigned long offset) +{ + struct nlattr *attrs[BRC_GENL_A_MAX + 1]; + struct sk_buff *request, *reply; + int retval; + int len; - indices = kcalloc(num, sizeof(int), GFP_KERNEL); - if (indices == NULL) + /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */ + if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry)) + maxnum = PAGE_SIZE/sizeof(struct __fdb_entry); + + request = brc_make_request(BRC_GENL_C_FDB_QUERY, dev->name, NULL); + if (!request) return -ENOMEM; + NLA_PUT_U64(request, BRC_GENL_A_FDB_COUNT, maxnum); + NLA_PUT_U64(request, BRC_GENL_A_FDB_SKIP, offset); - get_port_ifindices(dp, indices, num); - if (copy_to_user(uindices, indices, num * sizeof(int))) - num = -EFAULT; - kfree(indices); - return num; + rtnl_unlock(); + reply = brc_send_command(request, attrs); + retval = PTR_ERR(reply); + if (IS_ERR(reply)) + goto exit; + + retval = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]); + if (retval < 0) + goto exit_free_skb; + + retval = -EINVAL; + if (!attrs[BRC_GENL_A_FDB_DATA]) + goto exit_free_skb; + len = nla_len(attrs[BRC_GENL_A_FDB_DATA]); + if (len % sizeof(struct __fdb_entry) || + len / sizeof(struct __fdb_entry) > maxnum) + goto exit_free_skb; + + retval = len / sizeof(struct __fdb_entry); + if (copy_to_user(userbuf, nla_data(attrs[BRC_GENL_A_FDB_DATA]), len)) + retval = -EFAULT; + +exit_free_skb: + kfree_skb(reply); +exit: + rtnl_lock(); + return retval; + +nla_put_failure: + kfree_skb(request); + return -ENOMEM; } /* Legacy ioctl's through SIOCDEVPRIVATE. Called with rtnl_lock. */ -static int -old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { unsigned long args[4]; @@ -232,30 +329,33 @@ old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) case BRCTL_GET_PORT_LIST: return brc_get_port_list(dev, (int __user *)args[1], args[2]); + + case BRCTL_GET_FDB_ENTRIES: + return brc_get_fdb_entries(dev, (void __user *)args[1], + args[2], args[3]); } return -EOPNOTSUPP; } /* Called with the rtnl_lock. */ -static int -brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +static int brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { int err; switch (cmd) { - case SIOCDEVPRIVATE: - err = old_dev_ioctl(dev, rq, cmd); - break; - - case SIOCBRADDIF: - return brc_add_del_port(dev, rq->ifr_ifindex, 1); - case SIOCBRDELIF: - return brc_add_del_port(dev, rq->ifr_ifindex, 0); - - default: - err = -EOPNOTSUPP; - break; + case SIOCDEVPRIVATE: + err = old_dev_ioctl(dev, rq, cmd); + break; + + case SIOCBRADDIF: + return brc_add_del_port(dev, rq->ifr_ifindex, 1); + case SIOCBRDELIF: + return brc_add_del_port(dev, rq->ifr_ifindex, 0); + + default: + err = -EOPNOTSUPP; + break; } return err; @@ -277,7 +377,7 @@ static int brc_genl_query(struct sk_buff *skb, struct genl_info *info) void *data; ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!ans_skb) + if (!ans_skb) return -ENOMEM; data = genlmsg_put_reply(ans_skb, info, &brc_genl_family, @@ -297,24 +397,13 @@ nla_put_failure: return err; } -static struct genl_ops brc_genl_ops_query_dp = { - .cmd = BRC_GENL_C_QUERY_MC, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */ - .policy = NULL, - .doit = brc_genl_query, - .dumpit = NULL -}; - /* Attribute policy: what each attribute may contain. */ static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = { [BRC_GENL_A_ERR_CODE] = { .type = NLA_U32 }, - [BRC_GENL_A_PROC_DIR] = { .type = NLA_NUL_STRING }, - [BRC_GENL_A_PROC_NAME] = { .type = NLA_NUL_STRING }, - [BRC_GENL_A_PROC_DATA] = { .type = NLA_NUL_STRING }, + [BRC_GENL_A_FDB_DATA] = { .type = NLA_UNSPEC }, }; -static int -brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info) +static int brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info) { unsigned long int flags; int err; @@ -322,12 +411,21 @@ brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[BRC_GENL_A_ERR_CODE]) return -EINVAL; + skb = skb_clone(skb, GFP_KERNEL); + if (!skb) + return -ENOMEM; + spin_lock_irqsave(&brc_lock, flags); if (brc_seq == info->snd_seq) { - brc_err = nla_get_u32(info->attrs[BRC_GENL_A_ERR_CODE]); + brc_seq++; + + kfree_skb(brc_reply); + brc_reply = skb; + complete(&brc_done); err = 0; } else { + kfree_skb(skb); err = -ESTALE; } spin_unlock_irqrestore(&brc_lock, flags); @@ -335,27 +433,24 @@ brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info) return err; } -static struct genl_ops brc_genl_ops_dp_result = { - .cmd = BRC_GENL_C_DP_RESULT, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */ - .policy = brc_genl_policy, - .doit = brc_genl_dp_result, - .dumpit = NULL -}; - -static struct genl_ops brc_genl_ops_set_proc = { - .cmd = BRC_GENL_C_SET_PROC, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */ - .policy = brc_genl_policy, - .doit = brc_genl_set_proc, - .dumpit = NULL +static struct genl_ops brc_genl_ops[] = { + { .cmd = BRC_GENL_C_QUERY_MC, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */ + .policy = NULL, + .doit = brc_genl_query, + }, + { .cmd = BRC_GENL_C_DP_RESULT, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */ + .policy = brc_genl_policy, + .doit = brc_genl_dp_result, + }, }; -static int brc_send_command(const char *bridge, const char *port, int op) +static struct sk_buff *brc_send_command(struct sk_buff *request, + struct nlattr **attrs) { unsigned long int flags; - struct sk_buff *skb; - void *data; + struct sk_buff *reply; int error; mutex_lock(&brc_serial); @@ -363,77 +458,50 @@ static int brc_send_command(const char *bridge, const char *port, int op) /* Increment sequence number first, so that we ignore any replies * to stale requests. */ spin_lock_irqsave(&brc_lock, flags); - brc_seq++; + nlmsg_hdr(request)->nlmsg_seq = ++brc_seq; INIT_COMPLETION(brc_done); spin_unlock_irqrestore(&brc_lock, flags); - /* Compose message. */ - skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); - error = -ENOMEM; - if (skb == NULL) - goto exit_unlock; - data = genlmsg_put(skb, 0, brc_seq, &brc_genl_family, 0, op); - - NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge); - if (port) - NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port); - - genlmsg_end(skb, data); + nlmsg_end(request, nlmsg_hdr(request)); /* Send message. */ - error = genlmsg_multicast(skb, 0, brc_mc_group.id, GFP_KERNEL); + error = genlmsg_multicast(request, 0, brc_mc_group.id, GFP_KERNEL); if (error < 0) - goto exit_unlock; + goto error; /* Wait for reply. */ error = -ETIMEDOUT; - if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT)) - goto exit_unlock; + if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT)) { + pr_warn("timed out waiting for userspace\n"); + goto error; + } - error = -brc_err; - goto exit_unlock; + /* Grab reply. */ + spin_lock_irqsave(&brc_lock, flags); + reply = brc_reply; + brc_reply = NULL; + spin_unlock_irqrestore(&brc_lock, flags); -nla_put_failure: - kfree_skb(skb); -exit_unlock: mutex_unlock(&brc_serial); - return error; -} -int brc_add_dp(struct datapath *dp) -{ - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - brc_sysfs_add_dp(dp); + /* Re-parse message. Can't fail, since it parsed correctly once + * already. */ + error = nlmsg_parse(nlmsg_hdr(reply), GENL_HDRLEN, + attrs, BRC_GENL_A_MAX, brc_genl_policy); + WARN_ON(error); - return 0; -} + return reply; -int brc_del_dp(struct datapath *dp) -{ - brc_sysfs_del_dp(dp); - module_put(THIS_MODULE); - - return 0; +error: + mutex_unlock(&brc_serial); + return ERR_PTR(error); } -static int -__init brc_init(void) +static int __init brc_init(void) { - int i; int err; - printk("Open vSwitch Bridge Compatibility, built "__DATE__" "__TIME__"\n"); - - rcu_read_lock(); - for (i=0; i