meta-flow: Correctly set destination MAC in mf_set_flow_value().
[sliver-openvswitch.git] / datapath / brcompat.c
index d9255e6..339b5dc 100644 (file)
@@ -1,28 +1,35 @@
 /*
- * Copyright (c) 2009 Nicira Networks.
- * Distributed under the terms of the GNU GPL version 2.
+ * Copyright (c) 2007-2012 Nicira Networks.
  *
- * Significant portions of this file may be copied from parts of the Linux
- * kernel, by Linus Torvalds and others.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
 #include <linux/kernel.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/completion.h>
-#include <linux/delay.h>
 #include <linux/etherdevice.h>
 #include <linux/if_bridge.h>
-#include <linux/rculist.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 #include <net/genetlink.h>
 
-#include "compat.h"
 #include "openvswitch/brcompat-netlink.h"
-#include "brc_procfs.h"
-#include "brc_sysfs.h"
 #include "datapath.h"
-#include "dp_dev.h"
 
 static struct genl_family brc_genl_family;
 static struct genl_multicast_group brc_mc_group;
@@ -43,48 +50,20 @@ static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */
 static struct sk_buff *brc_reply;    /* Reply from userspace. */
 static u32 brc_seq;                 /* Sequence number for current op. */
 
-static struct sk_buff *brc_send_command(struct sk_buff *, struct nlattr **attrs);
+static struct sk_buff *brc_send_command(struct sk_buff *,
+                                       struct nlattr **attrs);
 static int brc_send_simple_command(struct sk_buff *);
 
-static int
-get_dp_ifindices(int *indices, int num)
-{
-       int i, index = 0;
-
-       rcu_read_lock();
-       for (i=0; i < ODP_MAX && index < num; i++) {
-               struct datapath *dp = get_dp(i);
-               if (!dp)
-                       continue;
-               indices[index++] = dp->ports[ODPP_LOCAL]->dev->ifindex;
-       }
-       rcu_read_unlock();
-
-       return index;
-}
-
-static void
-get_port_ifindices(struct datapath *dp, int *ifindices, int num)
-{
-       struct net_bridge_port *p;
-
-       rcu_read_lock();
-       list_for_each_entry_rcu (p, &dp->port_list, node) {
-               if (p->port_no < num)
-                       ifindices[p->port_no] = p->dev->ifindex;
-       }
-       rcu_read_unlock();
-}
-
-static struct sk_buff *
-brc_make_request(int op, const char *bridge, const char *port)
+static struct sk_buff *brc_make_request(int op, const char *bridge,
+                                       const char *port)
 {
        struct sk_buff *skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb)
                goto error;
 
        genlmsg_put(skb, 0, 0, &brc_genl_family, 0, op);
-       NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
+       if (bridge)
+               NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
        if (port)
                NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port);
        return skb;
@@ -115,6 +94,9 @@ static int brc_add_del_bridge(char __user *uname, int add)
        struct sk_buff *request;
        char name[IFNAMSIZ];
 
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
        if (copy_from_user(name, uname, IFNAMSIZ))
                return -EFAULT;
 
@@ -127,29 +109,59 @@ static int brc_add_del_bridge(char __user *uname, int add)
        return brc_send_simple_command(request);
 }
 
-static int brc_get_bridges(int __user *uindices, int n)
+static int brc_get_indices(int op, const char *br_name,
+                          int __user *uindices, int n)
 {
+       struct nlattr *attrs[BRC_GENL_A_MAX + 1];
+       struct sk_buff *request, *reply;
        int *indices;
        int ret;
+       int len;
 
+       if (n < 0)
+               return -EINVAL;
        if (n >= 2048)
                return -ENOMEM;
 
-       indices = kcalloc(n, sizeof(int), GFP_KERNEL);
-       if (indices == NULL)
+       request = brc_make_request(op, br_name, NULL);
+       if (!request)
                return -ENOMEM;
 
-       n = get_dp_ifindices(indices, n);
+       reply = brc_send_command(request, attrs);
+       ret = PTR_ERR(reply);
+       if (IS_ERR(reply))
+               goto exit;
+
+       ret = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
+       if (ret < 0)
+               goto exit_free_skb;
+
+       ret = -EINVAL;
+       if (!attrs[BRC_GENL_A_IFINDEXES])
+               goto exit_free_skb;
 
+       len = nla_len(attrs[BRC_GENL_A_IFINDEXES]);
+       indices = nla_data(attrs[BRC_GENL_A_IFINDEXES]);
+       if (len % sizeof(int))
+               goto exit_free_skb;
+
+       n = min_t(int, n, len / sizeof(int));
        ret = copy_to_user(uindices, indices, n * sizeof(int)) ? -EFAULT : n;
 
-       kfree(indices);
+exit_free_skb:
+       kfree_skb(reply);
+exit:
        return ret;
 }
 
+/* Called with br_ioctl_mutex. */
+static int brc_get_bridges(int __user *uindices, int n)
+{
+       return brc_get_indices(BRC_GENL_C_GET_BRIDGES, NULL, uindices, n);
+}
+
 /* Legacy deviceless bridge ioctl's.  Called with br_ioctl_mutex. */
-static int
-old_deviceless(void __user *uarg)
+static int old_deviceless(void __user *uarg)
 {
        unsigned long args[3];
 
@@ -191,13 +203,15 @@ brc_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
        return -EOPNOTSUPP;
 }
 
-static int
-brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
+static int brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
 {
        struct sk_buff *request;
        struct net_device *port;
        int err;
 
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
        port = __dev_get_by_index(&init_net, port_ifindex);
        if (!port)
                return -EINVAL;
@@ -216,18 +230,17 @@ brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
        return err;
 }
 
-static int
-brc_get_bridge_info(struct net_device *dev, struct __bridge_info __user *ub)
+static int brc_get_bridge_info(struct net_device *dev,
+                              struct __bridge_info __user *ub)
 {
        struct __bridge_info b;
-       u64 id = 0;
-       int i;
 
        memset(&b, 0, sizeof(struct __bridge_info));
 
-       for (i=0; i<ETH_ALEN; i++)
-               id |= (u64)dev->dev_addr[i] << (8*(ETH_ALEN-1 - i));
-       b.bridge_id = cpu_to_be64(id);
+       /* First two bytes are the priority, which we should skip.  This comes
+        * from struct bridge_id in br_private.h, which is unavailable to us.
+        */
+       memcpy((u8 *)&b.bridge_id + 2, dev->dev_addr, ETH_ALEN);
        b.stp_enabled = 0;
 
        if (copy_to_user(ub, &b, sizeof(struct __bridge_info)))
@@ -236,29 +249,17 @@ brc_get_bridge_info(struct net_device *dev, struct __bridge_info __user *ub)
        return 0;
 }
 
-static int
-brc_get_port_list(struct net_device *dev, int __user *uindices, int num)
+static int brc_get_port_list(struct net_device *dev, int __user *uindices,
+                            int num)
 {
-       struct dp_dev *dp_dev = netdev_priv(dev);
-       struct datapath *dp = dp_dev->dp;
-       int *indices;
-
-       if (num < 0)
-               return -EINVAL;
-       if (num == 0)
-               num = 256;
-       if (num > DP_MAX_PORTS)
-               num = DP_MAX_PORTS;
+       int retval;
 
-       indices = kcalloc(num, sizeof(int), GFP_KERNEL);
-       if (indices == NULL)
-               return -ENOMEM;
+       rtnl_unlock();
+       retval = brc_get_indices(BRC_GENL_C_GET_PORTS, dev->name,
+                                uindices, num);
+       rtnl_lock();
 
-       get_port_ifindices(dp, indices, num);
-       if (copy_to_user(uindices, indices, num * sizeof(int)))
-               num = -EFAULT;
-       kfree(indices);
-       return num;
+       return retval;
 }
 
 /*
@@ -268,7 +269,7 @@ brc_get_port_list(struct net_device *dev, int __user *uindices, int num)
  *            (limited to a page for sanity)
  * offset  -- number of records to skip
  */
-static int brc_get_fdb_entries(struct net_device *dev, void __user *userbuf, 
+static int brc_get_fdb_entries(struct net_device *dev, void __user *userbuf,
                               unsigned long maxnum, unsigned long offset)
 {
        struct nlattr *attrs[BRC_GENL_A_MAX + 1];
@@ -320,8 +321,7 @@ nla_put_failure:
 }
 
 /* Legacy ioctl's through SIOCDEVPRIVATE.  Called with rtnl_lock. */
-static int
-old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
        unsigned long args[4];
 
@@ -349,24 +349,23 @@ old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 }
 
 /* Called with the rtnl_lock. */
-static int
-brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
        int err;
 
        switch (cmd) {
-               case SIOCDEVPRIVATE:
-                       err = old_dev_ioctl(dev, rq, cmd);
-                       break;
-
-               case SIOCBRADDIF:
-                       return brc_add_del_port(dev, rq->ifr_ifindex, 1);
-               case SIOCBRDELIF:
-                       return brc_add_del_port(dev, rq->ifr_ifindex, 0);
-
-               default:
-                       err = -EOPNOTSUPP;
-                       break;
+       case SIOCDEVPRIVATE:
+               err = old_dev_ioctl(dev, rq, cmd);
+               break;
+
+       case SIOCBRADDIF:
+               return brc_add_del_port(dev, rq->ifr_ifindex, 1);
+       case SIOCBRDELIF:
+               return brc_add_del_port(dev, rq->ifr_ifindex, 0);
+
+       default:
+               err = -EOPNOTSUPP;
+               break;
        }
 
        return err;
@@ -388,7 +387,7 @@ static int brc_genl_query(struct sk_buff *skb, struct genl_info *info)
        void *data;
 
        ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-       if (!ans_skb) 
+       if (!ans_skb)
                return -ENOMEM;
 
        data = genlmsg_put_reply(ans_skb, info, &brc_genl_family,
@@ -408,27 +407,13 @@ nla_put_failure:
        return err;
 }
 
-static struct genl_ops brc_genl_ops_query_dp = {
-       .cmd = BRC_GENL_C_QUERY_MC,
-       .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
-       .policy = NULL,
-       .doit = brc_genl_query,
-       .dumpit = NULL
-};
-
 /* Attribute policy: what each attribute may contain.  */
 static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = {
        [BRC_GENL_A_ERR_CODE] = { .type = NLA_U32 },
-
-       [BRC_GENL_A_PROC_DIR] = { .type = NLA_NUL_STRING },
-       [BRC_GENL_A_PROC_NAME] = { .type = NLA_NUL_STRING },
-       [BRC_GENL_A_PROC_DATA] = { .type = NLA_NUL_STRING },
-
        [BRC_GENL_A_FDB_DATA] = { .type = NLA_UNSPEC },
 };
 
-static int
-brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
+static int brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
 {
        unsigned long int flags;
        int err;
@@ -444,8 +429,7 @@ brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
        if (brc_seq == info->snd_seq) {
                brc_seq++;
 
-               if (brc_reply)
-                       kfree_skb(brc_reply);
+               kfree_skb(brc_reply);
                brc_reply = skb;
 
                complete(&brc_done);
@@ -459,23 +443,21 @@ brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
        return err;
 }
 
-static struct genl_ops brc_genl_ops_dp_result = {
-       .cmd = BRC_GENL_C_DP_RESULT,
-       .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
-       .policy = brc_genl_policy,
-       .doit = brc_genl_dp_result,
-       .dumpit = NULL
-};
-
-static struct genl_ops brc_genl_ops_set_proc = {
-       .cmd = BRC_GENL_C_SET_PROC,
-       .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
-       .policy = brc_genl_policy,
-       .doit = brc_genl_set_proc,
-       .dumpit = NULL
+static struct genl_ops brc_genl_ops[] = {
+       { .cmd = BRC_GENL_C_QUERY_MC,
+         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
+         .policy = NULL,
+         .doit = brc_genl_query,
+       },
+       { .cmd = BRC_GENL_C_DP_RESULT,
+         .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
+         .policy = brc_genl_policy,
+         .doit = brc_genl_dp_result,
+       },
 };
 
-static struct sk_buff *brc_send_command(struct sk_buff *request, struct nlattr **attrs)
+static struct sk_buff *brc_send_command(struct sk_buff *request,
+                                       struct nlattr **attrs)
 {
        unsigned long int flags;
        struct sk_buff *reply;
@@ -499,8 +481,10 @@ static struct sk_buff *brc_send_command(struct sk_buff *request, struct nlattr *
 
        /* Wait for reply. */
        error = -ETIMEDOUT;
-       if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT))
+       if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT)) {
+               pr_warn("timed out waiting for userspace\n");
                goto error;
+       }
 
        /* Grab reply. */
        spin_lock_irqsave(&brc_lock, flags);
@@ -523,54 +507,17 @@ error:
        return ERR_PTR(error);
 }
 
-int brc_add_dp(struct datapath *dp)
-{
-       if (!try_module_get(THIS_MODULE))
-               return -ENODEV;
-       brc_sysfs_add_dp(dp);
-
-       return 0;
-}
-
-int brc_del_dp(struct datapath *dp) 
-{
-       brc_sysfs_del_dp(dp);
-       module_put(THIS_MODULE);
-
-       return 0;
-}
-
-static int 
-__init brc_init(void)
+static int __init brc_init(void)
 {
-       int i;
        int err;
 
-       printk("Open vSwitch Bridge Compatibility, built "__DATE__" "__TIME__"\n");
-
-       rcu_read_lock();
-       for (i=0; i<ODP_MAX; i++) {
-               if (get_dp(i)) {
-                       rcu_read_unlock();
-                       printk(KERN_EMERG "brcompat: no datapaths may exist!\n");
-                       return -EEXIST;
-               }
-       }
-       rcu_read_unlock();
+       pr_info("Open vSwitch Bridge Compatibility, built "__DATE__" "__TIME__"\n");
 
        /* Set the bridge ioctl handler */
        brioctl_set(brc_ioctl_deviceless_stub);
 
        /* Set the openvswitch_mod device ioctl handler */
-       dp_ioctl_hook = brc_dev_ioctl;
-
-       /* Register hooks for datapath adds and deletes */
-       dp_add_dp_hook = brc_add_dp;
-       dp_del_dp_hook = brc_del_dp;
-
-       /* Register hooks for interface adds and deletes */
-       dp_add_if_hook = brc_sysfs_add_if;
-       dp_del_if_hook = brc_sysfs_del_if;
+       ovs_dp_ioctl_hook = brc_dev_ioctl;
 
        /* Randomize the initial sequence number.  This is not a security
         * feature; it only helps avoid crossed wires between userspace and
@@ -579,22 +526,11 @@ __init brc_init(void)
 
        /* Register generic netlink family to communicate changes to
         * userspace. */
-       err = genl_register_family(&brc_genl_family);
+       err = genl_register_family_with_ops(&brc_genl_family,
+                                           brc_genl_ops, ARRAY_SIZE(brc_genl_ops));
        if (err)
                goto error;
 
-       err = genl_register_ops(&brc_genl_family, &brc_genl_ops_query_dp);
-       if (err != 0) 
-               goto err_unregister;
-
-       err = genl_register_ops(&brc_genl_family, &brc_genl_ops_dp_result);
-       if (err != 0) 
-               goto err_unregister;
-
-       err = genl_register_ops(&brc_genl_family, &brc_genl_ops_set_proc);
-       if (err != 0) 
-               goto err_unregister;
-
        strcpy(brc_mc_group.name, "brcompat");
        err = genl_register_mc_group(&brc_genl_family, &brc_mc_group);
        if (err < 0)
@@ -605,27 +541,17 @@ __init brc_init(void)
 err_unregister:
        genl_unregister_family(&brc_genl_family);
 error:
-       printk(KERN_EMERG "brcompat: failed to install!");
+       pr_emerg("failed to install!\n");
        return err;
 }
 
-static void 
-brc_cleanup(void)
+static void brc_cleanup(void)
 {
-       /* Unregister hooks for datapath adds and deletes */
-       dp_add_dp_hook = NULL;
-       dp_del_dp_hook = NULL;
-       
-       /* Unregister hooks for interface adds and deletes */
-       dp_add_if_hook = NULL;
-       dp_del_if_hook = NULL;
-
        /* Unregister ioctl hooks */
-       dp_ioctl_hook = NULL;
+       ovs_dp_ioctl_hook = NULL;
        brioctl_set(NULL);
 
        genl_unregister_family(&brc_genl_family);
-       brc_procfs_exit();
 }
 
 module_init(brc_init);
@@ -634,3 +560,12 @@ module_exit(brc_cleanup);
 MODULE_DESCRIPTION("Open vSwitch bridge compatibility");
 MODULE_AUTHOR("Nicira Networks");
 MODULE_LICENSE("GPL");
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
+/*
+ * In kernels 2.6.36 and later, Open vSwitch can safely coexist with
+ * the Linux bridge module, but it does not make sense to load both bridge and
+ * brcompat_mod, so this prevents it.
+ */
+BRIDGE_MUTUAL_EXCLUSION;
+#endif