Implement OFPT_HELLO simple version negotiation.
[sliver-openvswitch.git] / datapath / datapath.c
index 55c2c61..8eb0660 100644 (file)
@@ -28,6 +28,8 @@
 #include <linux/netfilter_bridge.h>
 #include <linux/inetdevice.h>
 #include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/workqueue.h>
 
 #include "openflow-netlink.h"
 #include "datapath.h"
 #include "dp_dev.h"
 #include "forward.h"
 #include "flow.h"
-#include "datapath_t.h"
 
 #include "compat.h"
 
 
+/* Strings to describe the manufacturer, hardware, and software.  This data 
+ * is queriable through the switch description stats message. */
+static char mfr_desc[DESC_STR_LEN] = "Nicira Networks";
+static char hw_desc[DESC_STR_LEN] = "Reference Linux Kernel Module";
+static char sw_desc[DESC_STR_LEN] = VERSION;
+static char serial_num[SERIAL_NUM_LEN] = "None";
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+module_param_string(mfr_desc, mfr_desc, sizeof mfr_desc, 0444);
+module_param_string(hw_desc, hw_desc, sizeof hw_desc, 0444);
+module_param_string(sw_desc, sw_desc, sizeof sw_desc, 0444);
+module_param_string(serial_num, serial_num, sizeof serial_num, 0444);
+#else
+MODULE_PARM(mfr_desc, "s");
+MODULE_PARM(hw_desc, "s");
+MODULE_PARM(sw_desc, "s");
+MODULE_PARM(serial_num, "s");
+#endif
+
+
 /* Number of milliseconds between runs of the maintenance thread. */
 #define MAINT_SLEEP_MSECS 1000
 
-#define BRIDGE_PORT_NO_FLOOD   0x00000001 
-
 #define UINT32_MAX                       4294967295U
 #define UINT16_MAX                       65535
 #define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
 
-struct net_bridge_port {
-       u16     port_no;
-       u32 flags;
-       struct datapath *dp;
-       struct net_device *dev;
-       struct list_head node; /* Element in datapath.ports. */
-};
-
 static struct genl_family dp_genl_family;
 static struct genl_multicast_group mc_group;
 
 /* It's hard to imagine wanting more than one datapath, but... */
 #define DP_MAX 32
 
-/* datapaths.  Protected on the read side by rcu_read_lock, on the write side
- * by dp_mutex.
+/* Datapaths.  Protected on the read side by rcu_read_lock, on the write side
+ * by dp_mutex.  dp_mutex is almost completely redundant with genl_mutex
+ * maintained by the Generic Netlink code, but the timeout path needs mutual
+ * exclusion too.
  *
  * It is safe to access the datapath and net_bridge_port structures with just
- * the dp_mutex, but to access the chain you need to take the rcu_read_lock
- * also (because dp_mutex doesn't prevent flows from being destroyed).
+ * dp_mutex.
  */
 static struct datapath *dps[DP_MAX];
-static DEFINE_MUTEX(dp_mutex);
+DEFINE_MUTEX(dp_mutex);
+EXPORT_SYMBOL(dp_mutex);
 
 static int dp_maint_func(void *data);
+static int update_port_status(struct net_bridge_port *p);
 static int send_port_status(struct net_bridge_port *p, uint8_t status);
 static int dp_genl_openflow_done(struct netlink_callback *);
 static struct net_bridge_port *new_nbp(struct datapath *,
@@ -242,9 +256,7 @@ uint64_t gen_datapath_id(uint16_t dp_idx)
 }
 
 /* Creates a new datapath numbered 'dp_idx'.  Returns 0 for success or a
- * negative error code.
- *
- * Not called with any locks. */
+ * negative error code. */
 static int new_dp(int dp_idx)
 {
        struct datapath *dp;
@@ -256,9 +268,8 @@ static int new_dp(int dp_idx)
        if (!try_module_get(THIS_MODULE))
                return -ENODEV;
 
-       mutex_lock(&dp_mutex);
-       dp = rcu_dereference(dps[dp_idx]);
-       if (dp != NULL) {
+       /* Exit early if a datapath with that number already exists. */
+       if (dps[dp_idx]) {
                err = -EEXIST;
                goto err_unlock;
        }
@@ -293,8 +304,7 @@ static int new_dp(int dp_idx)
        if (IS_ERR(dp->dp_task))
                goto err_destroy_chain;
 
-       rcu_assign_pointer(dps[dp_idx], dp);
-       mutex_unlock(&dp_mutex);
+       dps[dp_idx] = dp;
 
        return 0;
 
@@ -307,12 +317,11 @@ err_destroy_dp_dev:
 err_free_dp:
        kfree(dp);
 err_unlock:
-       mutex_unlock(&dp_mutex);
        module_put(THIS_MODULE);
                return err;
 }
 
-/* Find and return a free port number under 'dp'.  Called under dp_mutex. */
+/* Find and return a free port number under 'dp'. */
 static int find_portno(struct datapath *dp)
 {
        int i;
@@ -341,6 +350,8 @@ static struct net_bridge_port *new_nbp(struct datapath *dp,
        p->dp = dp;
        p->dev = dev;
        p->port_no = port_no;
+       spin_lock_init(&p->lock);
+       INIT_WORK(&p->port_task, NULL);
        if (port_no != OFPP_LOCAL)
                rcu_assign_pointer(dev->br_port, p);
        if (port_no < OFPP_MAX)
@@ -350,7 +361,6 @@ static struct net_bridge_port *new_nbp(struct datapath *dp,
        return p;
 }
 
-/* Called with dp_mutex. */
 int add_switch_port(struct datapath *dp, struct net_device *dev)
 {
        struct net_bridge_port *p;
@@ -368,17 +378,19 @@ int add_switch_port(struct datapath *dp, struct net_device *dev)
        if (IS_ERR(p))
                return PTR_ERR(p);
 
+       update_port_status(p);
+
        /* Notify the ctlpath that this port has been added */
        send_port_status(p, OFPPR_ADD);
 
        return 0;
 }
 
-/* Delete 'p' from switch.
- * Called with dp_mutex. */
+/* Delete 'p' from switch. */
 static int del_switch_port(struct net_bridge_port *p)
 {
        /* First drop references to device. */
+       cancel_work_sync(&p->port_task);
        rtnl_lock();
        dev_set_promiscuity(p->dev, -1);
        rtnl_unlock();
@@ -399,7 +411,6 @@ static int del_switch_port(struct net_bridge_port *p)
        return 0;
 }
 
-/* Called with dp_mutex. */
 static void del_dp(struct datapath *dp)
 {
        struct net_bridge_port *p, *n;
@@ -432,6 +443,16 @@ static int dp_maint_func(void *data)
        struct datapath *dp = (struct datapath *) data;
 
        while (!kthread_should_stop()) {
+               struct net_bridge_port *p;
+
+               /* Check if port status has changed */
+               rcu_read_lock();
+               list_for_each_entry_rcu (p, &dp->port_list, node) 
+                       if (update_port_status(p)) 
+                               send_port_status(p, OFPPR_MOD);
+               rcu_read_unlock();
+
+               /* Timeout old entries */
                chain_timeout(dp->chain);
                msleep_interruptible(MAINT_SLEEP_MSECS);
        }
@@ -444,7 +465,7 @@ do_port_input(struct net_bridge_port *p, struct sk_buff *skb)
 {
        /* Push the Ethernet header back on. */
        skb_push(skb, ETH_HLEN);
-       fwd_port_input(p->dp->chain, skb, p->port_no);
+       fwd_port_input(p->dp->chain, skb, p);
 }
 
 /*
@@ -467,8 +488,6 @@ static int dp_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb)
 }
 #else
 /* NB: This has only been tested on 2.4.35 */
-
-/* Called without any locks (?) */
 static void dp_frame_hook(struct sk_buff *skb)
 {
        struct net_bridge_port *p = skb->dev->br_port;
@@ -498,7 +517,7 @@ static inline unsigned packet_length(const struct sk_buff *skb)
 static int
 output_all(struct datapath *dp, struct sk_buff *skb, int flood)
 {
-       u32 disable = flood ? BRIDGE_PORT_NO_FLOOD : 0;
+       u32 disable = flood ? OFPPFL_NO_FLOOD : 0;
        struct net_bridge_port *p;
        int prev_port = -1;
 
@@ -511,12 +530,12 @@ output_all(struct datapath *dp, struct sk_buff *skb, int flood)
                                kfree_skb(skb);
                                return -ENOMEM;
                        }
-                       dp_output_port(dp, clone, prev_port); 
+                       dp_output_port(dp, clone, prev_port, 0); 
                }
                prev_port = p->port_no;
        }
        if (prev_port != -1)
-               dp_output_port(dp, skb, prev_port);
+               dp_output_port(dp, skb, prev_port, 0);
        else
                kfree_skb(skb);
 
@@ -538,50 +557,83 @@ int dp_set_origin(struct datapath *dp, uint16_t in_port,
        return -ENOENT;
 }
 
+static int xmit_skb(struct sk_buff *skb)
+{
+       int len = skb->len;
+       if (packet_length(skb) > skb->dev->mtu) {
+               printk("dropped over-mtu packet: %d > %d\n",
+                          packet_length(skb), skb->dev->mtu);
+               kfree_skb(skb);
+               return -E2BIG;
+       }
+
+       dev_queue_xmit(skb);
+
+       return len;
+}
+
 /* Takes ownership of 'skb' and transmits it to 'out_port' on 'dp'.
  */
-int dp_output_port(struct datapath *dp, struct sk_buff *skb, int out_port)
+int dp_output_port(struct datapath *dp, struct sk_buff *skb, int out_port,
+                  int ignore_no_fwd)
 {
        BUG_ON(!skb);
-       if (out_port == OFPP_FLOOD)
+       switch (out_port){
+       case OFPP_IN_PORT:
+               /* Send it out the port it came in on, which is already set in
+                * the skb. */
+               if (!skb->dev) {
+                       if (net_ratelimit())
+                               printk("skb device not set forwarding to in_port\n");
+                       kfree(skb);
+                       return -ESRCH;
+               }
+               return xmit_skb(skb);
+               
+       case OFPP_TABLE: {
+               int retval = run_flow_through_tables(dp->chain, skb,
+                                                    skb->dev->br_port);
+               if (retval)
+                       kfree_skb(skb);
+               return retval;
+       }
+
+       case OFPP_FLOOD:
                return output_all(dp, skb, 1);
-       else if (out_port == OFPP_ALL)
+
+       case OFPP_ALL:
                return output_all(dp, skb, 0);
-       else if (out_port == OFPP_CONTROLLER)
+
+       case OFPP_CONTROLLER:
                return dp_output_control(dp, skb, fwd_save_skb(skb), 0,
                                                  OFPR_ACTION);
-       else if (out_port == OFPP_TABLE) {
-               struct net_bridge_port *p = skb->dev->br_port;
-               struct sw_flow_key key;
-               struct sw_flow *flow;
-
-               flow_extract(skb, p ? p->port_no : OFPP_LOCAL, &key);
-               flow = chain_lookup(dp->chain, &key);
-               if (likely(flow != NULL)) {
-                       flow_used(flow, skb);
-                       execute_actions(dp, skb, &key, flow->actions, flow->n_actions);
-                       return 0;
-               }
-               return -ESRCH;
-       } else if (out_port == OFPP_LOCAL) {
+
+       case OFPP_LOCAL: {
                struct net_device *dev = dp->netdev;
                return dev ? dp_dev_recv(dev, skb) : -ESRCH;
-       } else if (out_port >= 0 && out_port < OFPP_MAX) {
+       }
+
+       case 0 ... OFPP_MAX-1: {
                struct net_bridge_port *p = dp->ports[out_port];
-               int len = skb->len;
                if (p == NULL)
                        goto bad_port;
-               skb->dev = p->dev; 
-               if (packet_length(skb) > skb->dev->mtu) {
-                       printk("dropped over-mtu packet: %d > %d\n",
-                              packet_length(skb), skb->dev->mtu);
+               if (p->dev == skb->dev) {
+                       /* To send to the input port, must use OFPP_IN_PORT */
                        kfree_skb(skb);
-                       return -E2BIG;
+                       if (net_ratelimit())
+                               printk("can't directly forward to input port\n");
+                       return -EINVAL;
                }
+               if (p->flags & OFPPFL_NO_FWD && !ignore_no_fwd) {
+                       kfree_skb(skb);
+                       return 0;
+               }
+               skb->dev = p->dev; 
+               return xmit_skb(skb);
+       }
 
-               dev_queue_xmit(skb);
-
-               return len;
+       default:
+               goto bad_port;
        }
 
 bad_port:
@@ -635,14 +687,26 @@ out:
 
 static void fill_port_desc(struct net_bridge_port *p, struct ofp_phy_port *desc)
 {
+       unsigned long flags;
        desc->port_no = htons(p->port_no);
        strncpy(desc->name, p->dev->name, OFP_MAX_PORT_NAME_LEN);
        desc->name[OFP_MAX_PORT_NAME_LEN-1] = '\0';
        memcpy(desc->hw_addr, p->dev->dev_addr, ETH_ALEN);
-       desc->flags = htonl(p->flags);
+       desc->flags = 0;
        desc->features = 0;
        desc->speed = 0;
 
+       if (p->port_no < 255) {
+               /* FIXME: this is a layering violation and should really be
+                * done in the secchan, as with OFPC_STP in
+                * OFP_SUPPORTED_CAPABILITIES. */
+               desc->features |= OFPPF_STP;
+       }
+
+       spin_lock_irqsave(&p->lock, flags);
+       desc->flags = htonl(p->flags | p->status);
+       spin_unlock_irqrestore(&p->lock, flags);
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,24)
        if (p->dev->ethtool_ops && p->dev->ethtool_ops->get_settings) {
                struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
@@ -664,11 +728,11 @@ static void fill_port_desc(struct net_bridge_port *p, struct ofp_phy_port *desc)
                        if (ecmd.supported & SUPPORTED_10000baseT_Full)
                                desc->features |= OFPPF_10GB_FD;
 
-                       desc->features = htonl(desc->features);
                        desc->speed = htonl(ecmd.speed);
                }
        }
 #endif
+       desc->features = htonl(desc->features);
 }
 
 static int 
@@ -677,15 +741,13 @@ fill_features_reply(struct datapath *dp, struct ofp_switch_features *ofr)
        struct net_bridge_port *p;
        int port_count = 0;
 
-       ofr->datapath_id    = cpu_to_be64(dp->id); 
+       ofr->datapath_id  = cpu_to_be64(dp->id); 
 
-       ofr->n_exact        = htonl(2 * TABLE_HASH_MAX_FLOWS);
-       ofr->n_compression  = 0;                                           /* Not supported */
-       ofr->n_general      = htonl(TABLE_LINEAR_MAX_FLOWS);
-       ofr->buffer_mb      = htonl(UINT32_MAX);
-       ofr->n_buffers      = htonl(N_PKT_BUFFERS);
-       ofr->capabilities   = htonl(OFP_SUPPORTED_CAPABILITIES);
-       ofr->actions        = htonl(OFP_SUPPORTED_ACTIONS);
+       ofr->n_buffers    = htonl(N_PKT_BUFFERS);
+       ofr->n_tables     = dp->chain->n_tables;
+       ofr->capabilities = htonl(OFP_SUPPORTED_CAPABILITIES);
+       ofr->actions      = htonl(OFP_SUPPORTED_ACTIONS);
+       memset(ofr->pad, 0, sizeof ofr->pad);
 
        list_for_each_entry_rcu (p, &dp->port_list, node) {
                fill_port_desc(p, &ofr->ports[port_count]);
@@ -737,19 +799,128 @@ dp_send_config_reply(struct datapath *dp, const struct sender *sender)
 }
 
 int
-dp_update_port_flags(struct datapath *dp, const struct ofp_phy_port *opp)
+dp_send_hello(struct datapath *dp, const struct sender *sender,
+             const struct ofp_header *request)
+{
+       if (request->version < OFP_VERSION) {
+               char err[64];
+               sprintf(err, "Only version 0x%02x supported", OFP_VERSION);
+               dp_send_error_msg(dp, sender, OFPET_HELLO_FAILED,
+                                 OFPHFC_INCOMPATIBLE, err, strlen(err));
+               return -EINVAL;
+       } else {
+               struct sk_buff *skb;
+               struct ofp_header *reply;
+
+               reply = alloc_openflow_skb(dp, sizeof *reply,
+                                          OFPT_HELLO, sender, &skb);
+               if (!reply)
+                       return -ENOMEM;
+
+               return send_openflow_skb(skb, sender);
+       }
+}
+
+/* Callback function for a workqueue to disable an interface */
+static void
+down_port_cb(struct work_struct *work)
 {
+       struct net_bridge_port *p = container_of(work, struct net_bridge_port, 
+                       port_task);
+
+       rtnl_lock();
+       if (dev_change_flags(p->dev, p->dev->flags & ~IFF_UP) < 0)
+               if (net_ratelimit())
+                       printk("problem bringing up port %s\n", p->dev->name);
+       rtnl_unlock();
+       p->status |= OFPPFL_PORT_DOWN;
+}
+
+/* Callback function for a workqueue to enable an interface */
+static void
+up_port_cb(struct work_struct *work)
+{
+       struct net_bridge_port *p = container_of(work, struct net_bridge_port, 
+                       port_task);
+
+       rtnl_lock();
+       if (dev_change_flags(p->dev, p->dev->flags | IFF_UP) < 0)
+               if (net_ratelimit())
+                       printk("problem bringing down port %s\n", p->dev->name);
+       rtnl_unlock();
+       p->status &= ~OFPPFL_PORT_DOWN;
+}
+
+int
+dp_update_port_flags(struct datapath *dp, const struct ofp_port_mod *opm)
+{
+       unsigned long int flags;
+       const struct ofp_phy_port *opp = &opm->desc;
        int port_no = ntohs(opp->port_no);
        struct net_bridge_port *p = (port_no < OFPP_MAX ? dp->ports[port_no]
                                     : port_no == OFPP_LOCAL ? dp->local_port
                                     : NULL);
+       uint32_t flag_mask;
+
        /* Make sure the port id hasn't changed since this was sent */
        if (!p || memcmp(opp->hw_addr, p->dev->dev_addr, ETH_ALEN))
                return -1;
-       p->flags = htonl(opp->flags);
+
+       spin_lock_irqsave(&p->lock, flags);
+       flag_mask = ntohl(opm->mask) & PORT_FLAG_BITS;
+       if (flag_mask) {
+               p->flags &= ~flag_mask;
+               p->flags |= ntohl(opp->flags) & flag_mask;
+       }
+
+       /* Modifying the status of an interface requires taking a lock
+        * that cannot be done from here.  For this reason, we use a shared 
+        * workqueue, which will cause it to be executed from a safer 
+        * context. */
+       if (opm->mask & htonl(OFPPFL_PORT_DOWN)) {
+               if ((opp->flags & htonl(OFPPFL_PORT_DOWN))
+                   && (p->status & OFPPFL_PORT_DOWN) == 0) {
+                       PREPARE_WORK(&p->port_task, down_port_cb);
+                       schedule_work(&p->port_task);
+               } else if ((opp->flags & htonl(OFPPFL_PORT_DOWN)) == 0
+                          && (p->status & OFPPFL_PORT_DOWN)) {
+                       PREPARE_WORK(&p->port_task, up_port_cb);
+                       schedule_work(&p->port_task);
+               }
+       }
+       spin_unlock_irqrestore(&p->lock, flags);
+
        return 0;
 }
 
+/* Update the port status field of the bridge port.  A non-zero return
+ * value indicates some field has changed. 
+ *
+ * NB: Callers of this function may hold the RCU read lock, so any
+ * additional checks must not sleep.
+ */
+static int
+update_port_status(struct net_bridge_port *p)
+{
+       unsigned long int flags;
+       uint32_t orig_status;
+
+       spin_lock_irqsave(&p->lock, flags);
+       orig_status = p->status;
+
+       if (p->dev->flags & IFF_UP) 
+               p->status &= ~OFPPFL_PORT_DOWN;
+       else
+               p->status |= OFPPFL_PORT_DOWN;
+
+       if (netif_carrier_ok(p->dev))
+               p->status &= ~OFPPFL_LINK_DOWN;
+       else
+               p->status |= OFPPFL_LINK_DOWN;
+
+       spin_unlock_irqrestore(&p->lock, flags);
+       return (orig_status != p->status);
+}
 
 static int
 send_port_status(struct net_bridge_port *p, uint8_t status)
@@ -769,11 +940,14 @@ send_port_status(struct net_bridge_port *p, uint8_t status)
 }
 
 int 
-dp_send_flow_expired(struct datapath *dp, struct sw_flow *flow)
+dp_send_flow_expired(struct datapath *dp, struct sw_flow *flow,
+                    enum ofp_flow_expired_reason reason)
 {
        struct sk_buff *skb;
        struct ofp_flow_expired *ofe;
-       unsigned long duration_j;
+
+       if (!(dp->flags & OFPC_SEND_FLOW_EXP))
+               return 0;
 
        ofe = alloc_openflow_skb(dp, sizeof *ofe, OFPT_FLOW_EXPIRED, 0, &skb);
        if (!ofe)
@@ -781,11 +955,12 @@ dp_send_flow_expired(struct datapath *dp, struct sw_flow *flow)
 
        flow_fill_match(&ofe->match, &flow->key);
 
-       memset(ofe->pad, 0, sizeof ofe->pad);
        ofe->priority = htons(flow->priority);
+       ofe->reason = reason;
+       memset(ofe->pad, 0, sizeof ofe->pad);
 
-       duration_j = (flow->timeout - HZ * flow->max_idle) - flow->init_time;
-       ofe->duration     = htonl(duration_j / HZ);
+       ofe->duration     = htonl((jiffies - flow->init_time) / HZ);
+       memset(ofe->pad2, 0, sizeof ofe->pad2);
        ofe->packet_count = cpu_to_be64(flow->packet_count);
        ofe->byte_count   = cpu_to_be64(flow->byte_count);
 
@@ -795,13 +970,13 @@ EXPORT_SYMBOL(dp_send_flow_expired);
 
 int
 dp_send_error_msg(struct datapath *dp, const struct sender *sender, 
-               uint16_t type, uint16_t code, const uint8_t *data, size_t len)
+               uint16_t type, uint16_t code, const void *data, size_t len)
 {
        struct sk_buff *skb;
        struct ofp_error_msg *oem;
 
 
-       oem = alloc_openflow_skb(dp, sizeof(*oem)+len, OFPT_ERROR_MSG
+       oem = alloc_openflow_skb(dp, sizeof(*oem)+len, OFPT_ERROR, 
                        sender, &skb);
        if (!oem)
                return -ENOMEM;
@@ -882,7 +1057,6 @@ static int dp_genl_del(struct sk_buff *skb, struct genl_info *info)
        if (!info->attrs[DP_GENL_A_DP_IDX])
                return -EINVAL;
 
-       mutex_lock(&dp_mutex);
        dp = dp_get(nla_get_u32((info->attrs[DP_GENL_A_DP_IDX])));
        if (!dp)
                err = -ENOENT;
@@ -890,7 +1064,6 @@ static int dp_genl_del(struct sk_buff *skb, struct genl_info *info)
                del_dp(dp);
                err = 0;
        }
-       mutex_unlock(&dp_mutex);
        return err;
 }
 
@@ -974,7 +1147,6 @@ static int dp_genl_add_del_port(struct sk_buff *skb, struct genl_info *info)
                return -EINVAL;
 
        /* Get datapath. */
-       mutex_lock(&dp_mutex);
        dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
        if (!dp) {
                err = -ENOENT;
@@ -1003,7 +1175,6 @@ static int dp_genl_add_del_port(struct sk_buff *skb, struct genl_info *info)
 out_put:
        dev_put(port);
 out:
-       mutex_unlock(&dp_mutex);
        return err;
 }
 
@@ -1034,26 +1205,22 @@ static int dp_genl_openflow(struct sk_buff *skb, struct genl_info *info)
        if (!info->attrs[DP_GENL_A_DP_IDX] || !va)
                return -EINVAL;
 
-       rcu_read_lock();
        dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
-       if (!dp) {
-               err = -ENOENT;
-               goto out;
-       }
+       if (!dp)
+               return -ENOENT;
 
-       if (nla_len(va) < sizeof(struct ofp_header)) {
-               err = -EINVAL;
-               goto out;
-       }
+       if (nla_len(va) < sizeof(struct ofp_header))
+               return -EINVAL;
        oh = nla_data(va);
 
        sender.xid = oh->xid;
        sender.pid = info->snd_pid;
        sender.seq = info->snd_seq;
-       err = fwd_control_input(dp->chain, &sender, nla_data(va), nla_len(va));
 
-out:
-       rcu_read_unlock();
+       mutex_lock(&dp_mutex);
+       err = fwd_control_input(dp->chain, &sender,
+                               nla_data(va), nla_len(va));
+       mutex_unlock(&dp_mutex);
        return err;
 }
 
@@ -1061,6 +1228,25 @@ static struct nla_policy dp_genl_openflow_policy[DP_GENL_A_MAX + 1] = {
        [DP_GENL_A_DP_IDX] = { .type = NLA_U32 },
 };
 
+static int desc_stats_dump(struct datapath *dp, void *state,
+                           void *body, int *body_len)
+{
+       struct ofp_desc_stats *ods = body;
+       int n_bytes = sizeof *ods;
+
+       if (n_bytes > *body_len) {
+               return -ENOBUFS;
+       }
+       *body_len = n_bytes;
+
+       strncpy(ods->mfr_desc, mfr_desc, sizeof ods->mfr_desc);
+       strncpy(ods->hw_desc, hw_desc, sizeof ods->hw_desc);
+       strncpy(ods->sw_desc, sw_desc, sizeof ods->sw_desc);
+       strncpy(ods->serial_num, serial_num, sizeof ods->serial_num);
+
+       return 0;
+}
+
 struct flow_stats_state {
        int table_idx;
        struct sw_table_position position;
@@ -1100,7 +1286,7 @@ static int flow_stats_dump_callback(struct sw_flow *flow, void *private)
        ofs->length          = htons(length);
        ofs->table_id        = s->table_idx;
        ofs->pad             = 0;
-       ofs->match.wildcards = htons(flow->key.wildcards);
+       ofs->match.wildcards = htonl(flow->key.wildcards);
        ofs->match.in_port   = flow->key.in_port;
        memcpy(ofs->match.dl_src, flow->key.dl_src, ETH_ALEN);
        memcpy(ofs->match.dl_dst, flow->key.dl_dst, ETH_ALEN);
@@ -1109,14 +1295,16 @@ static int flow_stats_dump_callback(struct sw_flow *flow, void *private)
        ofs->match.nw_src    = flow->key.nw_src;
        ofs->match.nw_dst    = flow->key.nw_dst;
        ofs->match.nw_proto  = flow->key.nw_proto;
-       memset(ofs->match.pad, 0, sizeof ofs->match.pad);
+       ofs->match.pad       = 0;
        ofs->match.tp_src    = flow->key.tp_src;
        ofs->match.tp_dst    = flow->key.tp_dst;
        ofs->duration        = htonl((jiffies - flow->init_time) / HZ);
+       ofs->priority        = htons(flow->priority);
+       ofs->idle_timeout    = htons(flow->idle_timeout);
+       ofs->hard_timeout    = htons(flow->hard_timeout);
+       memset(ofs->pad2, 0, sizeof ofs->pad2);
        ofs->packet_count    = cpu_to_be64(flow->packet_count);
        ofs->byte_count      = cpu_to_be64(flow->byte_count);
-       ofs->priority        = htons(flow->priority);
-       ofs->max_idle        = htons(flow->max_idle);
        memcpy(ofs->actions, flow->actions, actions_length);
 
        s->bytes_used += length;
@@ -1224,20 +1412,21 @@ static int table_stats_dump(struct datapath *dp, void *state,
                            void *body, int *body_len)
 {
        struct ofp_table_stats *ots;
-       int nbytes = dp->chain->n_tables * sizeof *ots;
+       int n_bytes = dp->chain->n_tables * sizeof *ots;
        int i;
-       if (nbytes > *body_len)
+       if (n_bytes > *body_len)
                return -ENOBUFS;
-       *body_len = nbytes;
+       *body_len = n_bytes;
        for (i = 0, ots = body; i < dp->chain->n_tables; i++, ots++) {
                struct sw_table_stats stats;
                dp->chain->tables[i]->stats(dp->chain->tables[i], &stats);
                strncpy(ots->name, stats.name, sizeof ots->name);
                ots->table_id = i;
+               ots->wildcards = htonl(stats.wildcards);
                memset(ots->pad, 0, sizeof ots->pad);
                ots->max_entries = htonl(stats.max_flows);
                ots->active_count = htonl(stats.n_flows);
-               ots->matched_count = cpu_to_be64(0); /* FIXME */
+               ots->matched_count = cpu_to_be64(stats.n_matched);
        }
        return 0;
 }
@@ -1279,10 +1468,18 @@ static int port_stats_dump(struct datapath *dp, void *state,
                stats = p->dev->get_stats(p->dev);
                ops->port_no = htons(p->port_no);
                memset(ops->pad, 0, sizeof ops->pad);
-               ops->rx_count = cpu_to_be64(stats->rx_packets);
-               ops->tx_count = cpu_to_be64(stats->tx_packets);
-               ops->drop_count = cpu_to_be64(stats->rx_dropped
-                                             + stats->tx_dropped);
+               ops->rx_packets   = cpu_to_be64(stats->rx_packets);
+               ops->tx_packets   = cpu_to_be64(stats->tx_packets);
+               ops->rx_bytes     = cpu_to_be64(stats->rx_bytes);
+               ops->tx_bytes     = cpu_to_be64(stats->tx_bytes);
+               ops->rx_dropped   = cpu_to_be64(stats->rx_dropped);
+               ops->tx_dropped   = cpu_to_be64(stats->tx_dropped);
+               ops->rx_errors    = cpu_to_be64(stats->rx_errors);
+               ops->tx_errors    = cpu_to_be64(stats->tx_errors);
+               ops->rx_frame_err = cpu_to_be64(stats->rx_frame_errors);
+               ops->rx_over_err  = cpu_to_be64(stats->rx_over_errors);
+               ops->rx_crc_err   = cpu_to_be64(stats->rx_crc_errors);
+               ops->collisions   = cpu_to_be64(stats->collisions);
                n_ports++;
                ops++;
        }
@@ -1322,6 +1519,13 @@ struct stats_type {
 };
 
 static const struct stats_type stats[] = {
+       [OFPST_DESC] = {
+               0,
+               0,
+               NULL,
+               desc_stats_dump,
+               NULL
+       },
        [OFPST_FLOW] = {
                sizeof(struct ofp_flow_stats_request),
                sizeof(struct ofp_flow_stats_request),
@@ -1369,7 +1573,8 @@ dp_genl_openflow_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
         * struct genl_ops.  This kluge supports earlier versions also. */
        cb->done = dp_genl_openflow_done;
 
-       rcu_read_lock();
+       sender.pid = NETLINK_CB(cb->skb).pid;
+       sender.seq = cb->nlh->nlmsg_seq;
        if (!cb->args[0]) {
                struct nlattr *attrs[DP_GENL_A_MAX + 1];
                struct ofp_stats_request *rq;
@@ -1382,35 +1587,40 @@ dp_genl_openflow_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
                if (err < 0)
                        return err;
 
-               err = -EINVAL;
-
                if (!attrs[DP_GENL_A_DP_IDX])
-                       goto out;
+                       return -EINVAL;
                dp_idx = nla_get_u16(attrs[DP_GENL_A_DP_IDX]);
                dp = dp_get(dp_idx);
-               if (!dp) {
-                       err = -ENOENT;
-                       goto out;
-               }
+               if (!dp)
+                       return -ENOENT;
 
                va = attrs[DP_GENL_A_OPENFLOW];
                len = nla_len(va);
                if (!va || len < sizeof *rq)
-                       goto out;
+                       return -EINVAL;
 
                rq = nla_data(va);
+               sender.xid = rq->header.xid;
                type = ntohs(rq->type);
-               if (rq->header.version != OFP_VERSION
-                   || rq->header.type != OFPT_STATS_REQUEST
-                   || ntohs(rq->header.length) != len
-                   || type >= ARRAY_SIZE(stats)
-                   || !stats[type].dump)
-                       goto out;
+               if (rq->header.version != OFP_VERSION) {
+                       dp_send_error_msg(dp, &sender, OFPET_BAD_REQUEST,
+                                         OFPBRC_BAD_VERSION, rq, len);
+                       return -EINVAL;
+               }
+               if (rq->header.type != OFPT_STATS_REQUEST
+                   || ntohs(rq->header.length) != len)
+                       return -EINVAL;
+
+               if (type >= ARRAY_SIZE(stats) || !stats[type].dump) {
+                       dp_send_error_msg(dp, &sender, OFPET_BAD_REQUEST,
+                                         OFPBRC_BAD_STAT, rq, len);
+                       return -EINVAL;
+               }
 
                s = &stats[type];
                body_len = len - offsetof(struct ofp_stats_request, body);
                if (body_len < s->min_body || body_len > s->max_body)
-                       goto out;
+                       return -EINVAL;
 
                cb->args[0] = 1;
                cb->args[1] = dp_idx;
@@ -1420,33 +1630,25 @@ dp_genl_openflow_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
                        void *state;
                        err = s->init(dp, rq->body, body_len, &state);
                        if (err)
-                               goto out;
+                               return err;
                        cb->args[4] = (long) state;
                }
        } else if (cb->args[0] == 1) {
+               sender.xid = cb->args[3];
                dp_idx = cb->args[1];
                s = &stats[cb->args[2]];
 
                dp = dp_get(dp_idx);
-               if (!dp) {
-                       err = -ENOENT;
-                       goto out;
-               }
+               if (!dp)
+                       return -ENOENT;
        } else {
-               err = 0;
-               goto out;
+               return 0;
        }
 
-       sender.xid = cb->args[3];
-       sender.pid = NETLINK_CB(cb->skb).pid;
-       sender.seq = cb->nlh->nlmsg_seq;
-
        osr = put_openflow_headers(dp, skb, OFPT_STATS_REPLY, &sender,
                                   &max_openflow_len);
-       if (IS_ERR(osr)) {
-               err = PTR_ERR(osr);
-               goto out;
-       }
+       if (IS_ERR(osr))
+               return PTR_ERR(osr);
        osr->type = htons(s - stats);
        osr->flags = 0;
        resize_openflow_skb(skb, &osr->header, max_openflow_len);
@@ -1465,8 +1667,6 @@ dp_genl_openflow_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
                err = skb->len;
        }
 
-out:
-       rcu_read_unlock();
        return err;
 }
 
@@ -1489,20 +1689,6 @@ static struct genl_ops dp_genl_ops_openflow = {
        .dumpit = dp_genl_openflow_dumpit,
 };
 
-static struct nla_policy dp_genl_benchmark_policy[DP_GENL_A_MAX + 1] = {
-       [DP_GENL_A_DP_IDX] = { .type = NLA_U32 },
-       [DP_GENL_A_NPACKETS] = { .type = NLA_U32 },
-       [DP_GENL_A_PSIZE] = { .type = NLA_U32 },
-};
-
-static struct genl_ops dp_genl_ops_benchmark_nl = {
-       .cmd = DP_GENL_C_BENCHMARK_NL,
-       .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-       .policy = dp_genl_benchmark_policy,
-       .doit = dp_genl_benchmark_nl,
-       .dumpit = NULL,
-};
-
 static struct genl_ops *dp_genl_all_ops[] = {
        /* Keep this operation first.  Generic Netlink dispatching
         * looks up operations with linear search, so we want it at the
@@ -1514,7 +1700,6 @@ static struct genl_ops *dp_genl_all_ops[] = {
        &dp_genl_ops_query_dp,
        &dp_genl_ops_add_port,
        &dp_genl_ops_del_port,
-       &dp_genl_ops_benchmark_nl,
 };
 
 static int dp_init_netlink(void)
@@ -1549,19 +1734,12 @@ static void dp_uninit_netlink(void)
        genl_unregister_family(&dp_genl_family);
 }
 
-#define DRV_NAME               "openflow"
-#define DRV_VERSION     VERSION
-#define DRV_DESCRIPTION "OpenFlow switching datapath implementation"
-#define DRV_COPYRIGHT   "Copyright (c) 2007, 2008 The Board of Trustees of The Leland Stanford Junior University"
-
-
 static int __init dp_init(void)
 {
        int err;
 
-       printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n");
-       printk(KERN_INFO DRV_NAME ": " VERSION" built on "__DATE__" "__TIME__"\n");
-       printk(KERN_INFO DRV_NAME ": " DRV_COPYRIGHT "\n");
+       printk("OpenFlow "VERSION", built "__DATE__" "__TIME__", "
+              "protocol 0x%02x\n", OFP_VERSION);
 
        err = flow_init();
        if (err)
@@ -1597,6 +1775,6 @@ static void dp_cleanup(void)
 module_init(dp_init);
 module_exit(dp_cleanup);
 
-MODULE_DESCRIPTION(DRV_DESCRIPTION);
-MODULE_AUTHOR(DRV_COPYRIGHT);
+MODULE_DESCRIPTION("OpenFlow switching datapath");
+MODULE_AUTHOR("Copyright (c) 2007, 2008 The Board of Trustees of The Leland Stanford Junior University");
 MODULE_LICENSE("GPL");