Prepare Open vSwitch 1.1.2 release.
[sliver-openvswitch.git] / datapath / brcompat.c
index 47805bc..f9d0083 100644 (file)
@@ -1,28 +1,24 @@
 /*
- * Copyright (c) 2009 Nicira Networks.
+ * Copyright (c) 2009, 2011 Nicira Networks.
  * Distributed under the terms of the GNU GPL version 2.
  *
  * Significant portions of this file may be copied from parts of the Linux
  * kernel, by Linus Torvalds and others.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <asm/uaccess.h>
 #include <linux/completion.h>
-#include <linux/delay.h>
 #include <linux/etherdevice.h>
 #include <linux/if_bridge.h>
-#include <linux/rculist.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 #include <net/genetlink.h>
 
-#include "compat.h"
 #include "openvswitch/brcompat-netlink.h"
-#include "brc_procfs.h"
-#include "brc_sysfs.h"
 #include "datapath.h"
-#include "dp_dev.h"
 
 static struct genl_family brc_genl_family;
 static struct genl_multicast_group brc_mc_group;
@@ -40,76 +36,120 @@ static DEFINE_MUTEX(brc_serial);
 /* Userspace communication. */
 static DEFINE_SPINLOCK(brc_lock);    /* Ensure atomic access to these vars. */
 static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */
-static int brc_err;                 /* Error code from userspace. */
+static struct sk_buff *brc_reply;    /* Reply from userspace. */
 static u32 brc_seq;                 /* Sequence number for current op. */
 
-static int brc_send_command(const char *bridge, const char *port, int op);
+static struct sk_buff *brc_send_command(struct sk_buff *, struct nlattr **attrs);
+static int brc_send_simple_command(struct sk_buff *);
 
-static int
-get_dp_ifindices(int *indices, int num)
+static struct sk_buff *brc_make_request(int op, const char *bridge,
+                                       const char *port)
 {
-       int i, index = 0;
-
-       rcu_read_lock();
-       for (i=0; i < ODP_MAX && index < num; i++) {
-               struct datapath *dp = get_dp(i);
-               if (!dp)
-                       continue;
-               indices[index++] = dp->ports[ODPP_LOCAL]->dev->ifindex;
-       }
-       rcu_read_unlock();
+       struct sk_buff *skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+       if (!skb)
+               goto error;
+
+       genlmsg_put(skb, 0, 0, &brc_genl_family, 0, op);
+       if (bridge)
+               NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
+       if (port)
+               NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port);
+       return skb;
 
-       return index;
+nla_put_failure:
+       kfree_skb(skb);
+error:
+       return NULL;
 }
 
-static void
-get_port_ifindices(struct datapath *dp, int *ifindices, int num)
+static int brc_send_simple_command(struct sk_buff *request)
 {
-       struct net_bridge_port *p;
+       struct nlattr *attrs[BRC_GENL_A_MAX + 1];
+       struct sk_buff *reply;
+       int error;
 
-       rcu_read_lock();
-       list_for_each_entry_rcu (p, &dp->port_list, node) {
-               if (p->port_no < num)
-                       ifindices[p->port_no] = p->dev->ifindex;
-       }
-       rcu_read_unlock();
+       reply = brc_send_command(request, attrs);
+       if (IS_ERR(reply))
+               return PTR_ERR(reply);
+
+       error = nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
+       kfree_skb(reply);
+       return -error;
 }
 
 static int brc_add_del_bridge(char __user *uname, int add)
 {
+       struct sk_buff *request;
        char name[IFNAMSIZ];
 
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
        if (copy_from_user(name, uname, IFNAMSIZ))
                return -EFAULT;
 
        name[IFNAMSIZ - 1] = 0;
-       return brc_send_command(name, NULL,
-                               add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL);
+       request = brc_make_request(add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL,
+                                  name, NULL);
+       if (!request)
+               return -ENOMEM;
+
+       return brc_send_simple_command(request);
 }
 
-static int brc_get_bridges(int __user *uindices, int n)
+static int brc_get_indices(int op, const char *br_name,
+                          int __user *uindices, int n)
 {
+       struct nlattr *attrs[BRC_GENL_A_MAX + 1];
+       struct sk_buff *request, *reply;
        int *indices;
        int ret;
+       int len;
 
+       if (n < 0)
+               return -EINVAL;
        if (n >= 2048)
                return -ENOMEM;
 
-       indices = kcalloc(n, sizeof(int), GFP_KERNEL);
-       if (indices == NULL)
+       request = brc_make_request(op, br_name, NULL);
+       if (!request)
                return -ENOMEM;
 
-       n = get_dp_ifindices(indices, n);
+       reply = brc_send_command(request, attrs);
+       ret = PTR_ERR(reply);
+       if (IS_ERR(reply))
+               goto exit;
 
+       ret = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
+       if (ret < 0)
+               goto exit_free_skb;
+
+       ret = -EINVAL;
+       if (!attrs[BRC_GENL_A_IFINDEXES])
+               goto exit_free_skb;
+
+       len = nla_len(attrs[BRC_GENL_A_IFINDEXES]);
+       indices = nla_data(attrs[BRC_GENL_A_IFINDEXES]);
+       if (len % sizeof(int))
+               goto exit_free_skb;
+
+       n = min_t(int, n, len / sizeof(int));
        ret = copy_to_user(uindices, indices, n * sizeof(int)) ? -EFAULT : n;
 
-       kfree(indices);
+exit_free_skb:
+       kfree_skb(reply);
+exit:
        return ret;
 }
 
+/* Called with br_ioctl_mutex. */
+static int brc_get_bridges(int __user *uindices, int n)
+{
+       return brc_get_indices(BRC_GENL_C_GET_BRIDGES, NULL, uindices, n);
+}
+
 /* Legacy deviceless bridge ioctl's.  Called with br_ioctl_mutex. */
-static int
-old_deviceless(void __user *uarg)
+static int old_deviceless(void __user *uarg)
 {
        unsigned long args[3];
 
@@ -151,42 +191,44 @@ brc_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
        return -EOPNOTSUPP;
 }
 
-static int
-brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
+static int brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
 {
+       struct sk_buff *request;
        struct net_device *port;
-       char dev_name[IFNAMSIZ], port_name[IFNAMSIZ];
        int err;
 
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
        port = __dev_get_by_index(&init_net, port_ifindex);
        if (!port)
                return -EINVAL;
 
        /* Save name of dev and port because there's a race between the
-        * rtnl_unlock() and the brc_send_command(). */
-       strcpy(dev_name, dev->name);
-       strcpy(port_name, port->name);
+        * rtnl_unlock() and the brc_send_simple_command(). */
+       request = brc_make_request(add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL,
+                                  dev->name, port->name);
+       if (!request)
+               return -ENOMEM;
 
        rtnl_unlock();
-       err = brc_send_command(dev_name, port_name,
-                              add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL);
+       err = brc_send_simple_command(request);
        rtnl_lock();
 
        return err;
 }
 
-static int
-brc_get_bridge_info(struct net_device *dev, struct __bridge_info __user *ub)
+static int brc_get_bridge_info(struct net_device *dev,
+                              struct __bridge_info __user *ub)
 {
        struct __bridge_info b;
-       u64 id = 0;
-       int i;
 
        memset(&b, 0, sizeof(struct __bridge_info));
 
-       for (i=0; i<ETH_ALEN; i++)
-               id |= (u64)dev->dev_addr[i] << (8*(ETH_ALEN-1 - i));
-       b.bridge_id = cpu_to_be64(id);
+       /* First two bytes are the priority, which we should skip.  This comes
+        * from struct bridge_id in br_private.h, which is unavailable to us.
+        */
+       memcpy((u8 *)&b.bridge_id + 2, dev->dev_addr, ETH_ALEN);
        b.stp_enabled = 0;
 
        if (copy_to_user(ub, &b, sizeof(struct __bridge_info)))
@@ -195,34 +237,79 @@ brc_get_bridge_info(struct net_device *dev, struct __bridge_info __user *ub)
        return 0;
 }
 
-static int
-brc_get_port_list(struct net_device *dev, int __user *uindices, int num)
+static int brc_get_port_list(struct net_device *dev, int __user *uindices,
+                            int num)
 {
-       struct dp_dev *dp_dev = netdev_priv(dev);
-       struct datapath *dp = dp_dev->dp;
-       int *indices;
+       int retval;
 
-       if (num < 0)
-               return -EINVAL;
-       if (num == 0)
-               num = 256;
-       if (num > DP_MAX_PORTS)
-               num = DP_MAX_PORTS;
+       rtnl_unlock();
+       retval = brc_get_indices(BRC_GENL_C_GET_PORTS, dev->name,
+                                uindices, num);
+       rtnl_lock();
 
-       indices = kcalloc(num, sizeof(int), GFP_KERNEL);
-       if (indices == NULL)
+       return retval;
+}
+
+/*
+ * Format up to a page worth of forwarding table entries
+ * userbuf -- where to copy result
+ * maxnum  -- maximum number of entries desired
+ *            (limited to a page for sanity)
+ * offset  -- number of records to skip
+ */
+static int brc_get_fdb_entries(struct net_device *dev, void __user *userbuf,
+                              unsigned long maxnum, unsigned long offset)
+{
+       struct nlattr *attrs[BRC_GENL_A_MAX + 1];
+       struct sk_buff *request, *reply;
+       int retval;
+       int len;
+
+       /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */
+       if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry))
+               maxnum = PAGE_SIZE/sizeof(struct __fdb_entry);
+
+       request = brc_make_request(BRC_GENL_C_FDB_QUERY, dev->name, NULL);
+       if (!request)
                return -ENOMEM;
+       NLA_PUT_U64(request, BRC_GENL_A_FDB_COUNT, maxnum);
+       NLA_PUT_U64(request, BRC_GENL_A_FDB_SKIP, offset);
 
-       get_port_ifindices(dp, indices, num);
-       if (copy_to_user(uindices, indices, num * sizeof(int)))
-               num = -EFAULT;
-       kfree(indices);
-       return num;
+       rtnl_unlock();
+       reply = brc_send_command(request, attrs);
+       retval = PTR_ERR(reply);
+       if (IS_ERR(reply))
+               goto exit;
+
+       retval = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
+       if (retval < 0)
+               goto exit_free_skb;
+
+       retval = -EINVAL;
+       if (!attrs[BRC_GENL_A_FDB_DATA])
+               goto exit_free_skb;
+       len = nla_len(attrs[BRC_GENL_A_FDB_DATA]);
+       if (len % sizeof(struct __fdb_entry) ||
+           len / sizeof(struct __fdb_entry) > maxnum)
+               goto exit_free_skb;
+
+       retval = len / sizeof(struct __fdb_entry);
+       if (copy_to_user(userbuf, nla_data(attrs[BRC_GENL_A_FDB_DATA]), len))
+               retval = -EFAULT;
+
+exit_free_skb:
+       kfree_skb(reply);
+exit:
+       rtnl_lock();
+       return retval;
+
+nla_put_failure:
+       kfree_skb(request);
+       return -ENOMEM;
 }
 
 /* Legacy ioctl's through SIOCDEVPRIVATE.  Called with rtnl_lock. */
-static int
-old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
        unsigned long args[4];
 
@@ -240,14 +327,17 @@ old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 
        case BRCTL_GET_PORT_LIST:
                return brc_get_port_list(dev, (int __user *)args[1], args[2]);
+
+       case BRCTL_GET_FDB_ENTRIES:
+               return brc_get_fdb_entries(dev, (void __user *)args[1],
+                                          args[2], args[3]);
        }
 
        return -EOPNOTSUPP;
 }
 
 /* Called with the rtnl_lock. */
-static int
-brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
        int err;
 
@@ -285,7 +375,7 @@ static int brc_genl_query(struct sk_buff *skb, struct genl_info *info)
        void *data;
 
        ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-       if (!ans_skb) 
+       if (!ans_skb)
                return -ENOMEM;
 
        data = genlmsg_put_reply(ans_skb, info, &brc_genl_family,
@@ -305,24 +395,13 @@ nla_put_failure:
        return err;
 }
 
-static struct genl_ops brc_genl_ops_query_dp = {
-       .cmd = BRC_GENL_C_QUERY_MC,
-       .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
-       .policy = NULL,
-       .doit = brc_genl_query,
-       .dumpit = NULL
-};
-
 /* Attribute policy: what each attribute may contain.  */
 static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = {
        [BRC_GENL_A_ERR_CODE] = { .type = NLA_U32 },
-       [BRC_GENL_A_PROC_DIR] = { .type = NLA_NUL_STRING },
-       [BRC_GENL_A_PROC_NAME] = { .type = NLA_NUL_STRING },
-       [BRC_GENL_A_PROC_DATA] = { .type = NLA_NUL_STRING },
+       [BRC_GENL_A_FDB_DATA] = { .type = NLA_UNSPEC },
 };
 
-static int
-brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
+static int brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
 {
        unsigned long int flags;
        int err;
@@ -330,12 +409,21 @@ brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
        if (!info->attrs[BRC_GENL_A_ERR_CODE])
                return -EINVAL;
 
+       skb = skb_clone(skb, GFP_KERNEL);
+       if (!skb)
+               return -ENOMEM;
+
        spin_lock_irqsave(&brc_lock, flags);
        if (brc_seq == info->snd_seq) {
-               brc_err = nla_get_u32(info->attrs[BRC_GENL_A_ERR_CODE]);
+               brc_seq++;
+
+               kfree_skb(brc_reply);
+               brc_reply = skb;
+
                complete(&brc_done);
                err = 0;
        } else {
+               kfree_skb(skb);
                err = -ESTALE;
        }
        spin_unlock_irqrestore(&brc_lock, flags);
@@ -343,27 +431,24 @@ brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
        return err;
 }
 
-static struct genl_ops brc_genl_ops_dp_result = {
-       .cmd = BRC_GENL_C_DP_RESULT,
-       .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
-       .policy = brc_genl_policy,
-       .doit = brc_genl_dp_result,
-       .dumpit = NULL
-};
-
-static struct genl_ops brc_genl_ops_set_proc = {
-       .cmd = BRC_GENL_C_SET_PROC,
-       .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
-       .policy = brc_genl_policy,
-       .doit = brc_genl_set_proc,
-       .dumpit = NULL
+static struct genl_ops brc_genl_ops[] = {
+       { .cmd = BRC_GENL_C_QUERY_MC,
+         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
+         .policy = NULL,
+         .doit = brc_genl_query,
+       },
+       { .cmd = BRC_GENL_C_DP_RESULT,
+         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
+         .policy = brc_genl_policy,
+         .doit = brc_genl_dp_result,
+       },
 };
 
-static int brc_send_command(const char *bridge, const char *port, int op)
+static struct sk_buff *brc_send_command(struct sk_buff *request,
+                                       struct nlattr **attrs)
 {
        unsigned long int flags;
-       struct sk_buff *skb;
-       void *data;
+       struct sk_buff *reply;
        int error;
 
        mutex_lock(&brc_serial);
@@ -371,98 +456,57 @@ static int brc_send_command(const char *bridge, const char *port, int op)
        /* Increment sequence number first, so that we ignore any replies
         * to stale requests. */
        spin_lock_irqsave(&brc_lock, flags);
-       brc_seq++;
+       nlmsg_hdr(request)->nlmsg_seq = ++brc_seq;
        INIT_COMPLETION(brc_done);
        spin_unlock_irqrestore(&brc_lock, flags);
 
-       /* Compose message. */
-       skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
-       error = -ENOMEM;
-       if (skb == NULL)
-               goto exit_unlock;
-       data = genlmsg_put(skb, 0, brc_seq, &brc_genl_family, 0, op);
-
-       NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
-       if (port)
-               NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port);
-
-       genlmsg_end(skb, data);
+       nlmsg_end(request, nlmsg_hdr(request));
 
        /* Send message. */
-       error = genlmsg_multicast(skb, 0, brc_mc_group.id, GFP_KERNEL);
+       error = genlmsg_multicast(request, 0, brc_mc_group.id, GFP_KERNEL);
        if (error < 0)
-               goto exit_unlock;
+               goto error;
 
        /* Wait for reply. */
        error = -ETIMEDOUT;
-       if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT))
-               goto exit_unlock;
+       if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT)) {
+               pr_warn("timed out waiting for userspace\n");
+               goto error;
+    }
 
-       error = -brc_err;
-       goto exit_unlock;
+       /* Grab reply. */
+       spin_lock_irqsave(&brc_lock, flags);
+       reply = brc_reply;
+       brc_reply = NULL;
+       spin_unlock_irqrestore(&brc_lock, flags);
 
-nla_put_failure:
-       kfree_skb(skb);
-exit_unlock:
        mutex_unlock(&brc_serial);
-       return error;
-}
-
-int brc_add_dp(struct datapath *dp)
-{
-       if (!try_module_get(THIS_MODULE))
-               return -ENODEV;
-#ifdef SUPPORT_SYSFS
-       brc_sysfs_add_dp(dp);
-#endif
 
-       return 0;
-}
+       /* Re-parse message.  Can't fail, since it parsed correctly once
+        * already. */
+       error = nlmsg_parse(nlmsg_hdr(reply), GENL_HDRLEN,
+                           attrs, BRC_GENL_A_MAX, brc_genl_policy);
+       WARN_ON(error);
 
-int brc_del_dp(struct datapath *dp) 
-{
-#ifdef SUPPORT_SYSFS
-       brc_sysfs_del_dp(dp);
-#endif
-       module_put(THIS_MODULE);
+       return reply;
 
-       return 0;
+error:
+       mutex_unlock(&brc_serial);
+       return ERR_PTR(error);
 }
 
-static int 
-__init brc_init(void)
+static int __init brc_init(void)
 {
-       int i;
        int err;
 
        printk("Open vSwitch Bridge Compatibility, built "__DATE__" "__TIME__"\n");
 
-       rcu_read_lock();
-       for (i=0; i<ODP_MAX; i++) {
-               if (get_dp(i)) {
-                       rcu_read_unlock();
-                       printk(KERN_EMERG "brcompat: no datapaths may exist!\n");
-                       return -EEXIST;
-               }
-       }
-       rcu_read_unlock();
-
        /* Set the bridge ioctl handler */
        brioctl_set(brc_ioctl_deviceless_stub);
 
        /* Set the openvswitch_mod device ioctl handler */
        dp_ioctl_hook = brc_dev_ioctl;
 
-       /* Register hooks for datapath adds and deletes */
-       dp_add_dp_hook = brc_add_dp;
-       dp_del_dp_hook = brc_del_dp;
-
-       /* Register hooks for interface adds and deletes */
-#ifdef SUPPORT_SYSFS
-       dp_add_if_hook = brc_sysfs_add_if;
-       dp_del_if_hook = brc_sysfs_del_if;
-#endif
-
        /* Randomize the initial sequence number.  This is not a security
         * feature; it only helps avoid crossed wires between userspace and
         * the kernel when the module is unloaded and reloaded. */
@@ -470,22 +514,11 @@ __init brc_init(void)
 
        /* Register generic netlink family to communicate changes to
         * userspace. */
-       err = genl_register_family(&brc_genl_family);
+       err = genl_register_family_with_ops(&brc_genl_family,
+                                           brc_genl_ops, ARRAY_SIZE(brc_genl_ops));
        if (err)
                goto error;
 
-       err = genl_register_ops(&brc_genl_family, &brc_genl_ops_query_dp);
-       if (err != 0) 
-               goto err_unregister;
-
-       err = genl_register_ops(&brc_genl_family, &brc_genl_ops_dp_result);
-       if (err != 0) 
-               goto err_unregister;
-
-       err = genl_register_ops(&brc_genl_family, &brc_genl_ops_set_proc);
-       if (err != 0) 
-               goto err_unregister;
-
        strcpy(brc_mc_group.name, "brcompat");
        err = genl_register_mc_group(&brc_genl_family, &brc_mc_group);
        if (err < 0)
@@ -496,27 +529,17 @@ __init brc_init(void)
 err_unregister:
        genl_unregister_family(&brc_genl_family);
 error:
-       printk(KERN_EMERG "brcompat: failed to install!");
+       pr_emerg("failed to install!\n");
        return err;
 }
 
-static void 
-brc_cleanup(void)
+static void brc_cleanup(void)
 {
-       /* Unregister hooks for datapath adds and deletes */
-       dp_add_dp_hook = NULL;
-       dp_del_dp_hook = NULL;
-       
-       /* Unregister hooks for interface adds and deletes */
-       dp_add_if_hook = NULL;
-       dp_del_if_hook = NULL;
-
        /* Unregister ioctl hooks */
        dp_ioctl_hook = NULL;
        brioctl_set(NULL);
 
        genl_unregister_family(&brc_genl_family);
-       brc_procfs_exit();
 }
 
 module_init(brc_init);