Added new interface statistics.
[sliver-openvswitch.git] / datapath / datapath.c
index c87fa24..5b59676 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/netfilter_bridge.h>
 #include <linux/inetdevice.h>
 #include <linux/list.h>
+#include <linux/rculist.h>
 
 #include "openflow-netlink.h"
 #include "datapath.h"
@@ -36,7 +37,6 @@
 #include "dp_dev.h"
 #include "forward.h"
 #include "flow.h"
-#include "datapath_t.h"
 
 #include "compat.h"
 
@@ -64,15 +64,17 @@ static struct genl_multicast_group mc_group;
 /* It's hard to imagine wanting more than one datapath, but... */
 #define DP_MAX 32
 
-/* datapaths.  Protected on the read side by rcu_read_lock, on the write side
- * by dp_mutex.
+/* Datapaths.  Protected on the read side by rcu_read_lock, on the write side
+ * by dp_mutex.  dp_mutex is almost completely redundant with genl_mutex
+ * maintained by the Generic Netlink code, but the timeout path needs mutual
+ * exclusion too.
  *
  * It is safe to access the datapath and net_bridge_port structures with just
- * the dp_mutex, but to access the chain you need to take the rcu_read_lock
- * also (because dp_mutex doesn't prevent flows from being destroyed).
+ * dp_mutex.
  */
 static struct datapath *dps[DP_MAX];
-static DEFINE_MUTEX(dp_mutex);
+DEFINE_MUTEX(dp_mutex);
+EXPORT_SYMBOL(dp_mutex);
 
 static int dp_maint_func(void *data);
 static int send_port_status(struct net_bridge_port *p, uint8_t status);
@@ -201,13 +203,9 @@ alloc_openflow_skb(struct datapath *dp, size_t openflow_len, uint8_t type,
 static int
 send_openflow_skb(struct sk_buff *skb, const struct sender *sender) 
 {
-       int err = (sender
-                  ? genlmsg_unicast(skb, sender->pid)
-                  : genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC));
-       if (err && net_ratelimit())
-               printk(KERN_WARNING "send_openflow_skb: send failed: %d\n",
-                      err);
-       return err;
+       return (sender
+               ? genlmsg_unicast(skb, sender->pid)
+               : genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC));
 }
 
 /* Generates a unique datapath id.  It incorporates the datapath index
@@ -246,9 +244,7 @@ uint64_t gen_datapath_id(uint16_t dp_idx)
 }
 
 /* Creates a new datapath numbered 'dp_idx'.  Returns 0 for success or a
- * negative error code.
- *
- * Not called with any locks. */
+ * negative error code. */
 static int new_dp(int dp_idx)
 {
        struct datapath *dp;
@@ -260,9 +256,8 @@ static int new_dp(int dp_idx)
        if (!try_module_get(THIS_MODULE))
                return -ENODEV;
 
-       mutex_lock(&dp_mutex);
-       dp = rcu_dereference(dps[dp_idx]);
-       if (dp != NULL) {
+       /* Exit early if a datapath with that number already exists. */
+       if (dps[dp_idx]) {
                err = -EEXIST;
                goto err_unlock;
        }
@@ -297,8 +292,7 @@ static int new_dp(int dp_idx)
        if (IS_ERR(dp->dp_task))
                goto err_destroy_chain;
 
-       rcu_assign_pointer(dps[dp_idx], dp);
-       mutex_unlock(&dp_mutex);
+       dps[dp_idx] = dp;
 
        return 0;
 
@@ -311,12 +305,11 @@ err_destroy_dp_dev:
 err_free_dp:
        kfree(dp);
 err_unlock:
-       mutex_unlock(&dp_mutex);
        module_put(THIS_MODULE);
                return err;
 }
 
-/* Find and return a free port number under 'dp'.  Called under dp_mutex. */
+/* Find and return a free port number under 'dp'. */
 static int find_portno(struct datapath *dp)
 {
        int i;
@@ -354,7 +347,6 @@ static struct net_bridge_port *new_nbp(struct datapath *dp,
        return p;
 }
 
-/* Called with dp_mutex. */
 int add_switch_port(struct datapath *dp, struct net_device *dev)
 {
        struct net_bridge_port *p;
@@ -378,8 +370,7 @@ int add_switch_port(struct datapath *dp, struct net_device *dev)
        return 0;
 }
 
-/* Delete 'p' from switch.
- * Called with dp_mutex. */
+/* Delete 'p' from switch. */
 static int del_switch_port(struct net_bridge_port *p)
 {
        /* First drop references to device. */
@@ -403,19 +394,26 @@ static int del_switch_port(struct net_bridge_port *p)
        return 0;
 }
 
-/* Called with dp_mutex. */
 static void del_dp(struct datapath *dp)
 {
-       struct net_bridge_port *p;
+       struct net_bridge_port *p, *n;
 
-       dp_dev_destroy(dp);
        kthread_stop(dp->dp_task);
 
        /* Drop references to DP. */
-       list_for_each_entry_rcu (p, &dp->port_list, node)
+       list_for_each_entry_safe (p, n, &dp->port_list, node)
                del_switch_port(p);
        rcu_assign_pointer(dps[dp->dp_idx], NULL);
 
+       /* Kill off local_port dev references from buffered packets that have
+        * associated dst entries. */
+       synchronize_rcu();
+       fwd_discard_all();
+
+       /* Destroy dp->netdev.  (Must follow deleting switch ports since
+        * dp->local_port has a reference to it.) */
+       dp_dev_destroy(dp);
+
        /* Wait until no longer in use, then destroy it. */
        synchronize_rcu();
        chain_destroy(dp->chain);
@@ -439,10 +437,7 @@ static void
 do_port_input(struct net_bridge_port *p, struct sk_buff *skb) 
 {
        /* Push the Ethernet header back on. */
-       if (skb->protocol == htons(ETH_P_8021Q))
-               skb_push(skb, VLAN_ETH_HLEN);
-       else
-               skb_push(skb, ETH_HLEN);
+       skb_push(skb, ETH_HLEN);
        fwd_port_input(p->dp->chain, skb, p->port_no);
 }
 
@@ -466,8 +461,6 @@ static int dp_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb)
 }
 #else
 /* NB: This has only been tested on 2.4.35 */
-
-/* Called without any locks (?) */
 static void dp_frame_hook(struct sk_buff *skb)
 {
        struct net_bridge_port *p = skb->dev->br_port;
@@ -537,50 +530,80 @@ int dp_set_origin(struct datapath *dp, uint16_t in_port,
        return -ENOENT;
 }
 
+static int xmit_skb(struct sk_buff *skb)
+{
+       int len = skb->len;
+       if (packet_length(skb) > skb->dev->mtu) {
+               printk("dropped over-mtu packet: %d > %d\n",
+                          packet_length(skb), skb->dev->mtu);
+               kfree_skb(skb);
+               return -E2BIG;
+       }
+
+       dev_queue_xmit(skb);
+
+       return len;
+}
+
 /* Takes ownership of 'skb' and transmits it to 'out_port' on 'dp'.
  */
 int dp_output_port(struct datapath *dp, struct sk_buff *skb, int out_port)
 {
        BUG_ON(!skb);
-       if (out_port == OFPP_FLOOD)
+       switch (out_port){
+       case OFPP_IN_PORT:
+               /* Send it out the port it came in on, which is already set in
+                * the skb. */
+               if (!skb->dev) {
+                       if (net_ratelimit())
+                               printk("skb device not set forwarding to in_port\n");
+                       kfree(skb);
+                       return -ESRCH;
+               }
+               return xmit_skb(skb);
+               
+       case OFPP_TABLE: {
+               struct net_bridge_port *p = skb->dev->br_port;
+               int retval;
+               retval = run_flow_through_tables(dp->chain, skb,
+                                                p ? p->port_no : OFPP_LOCAL);
+               if (retval)
+                       kfree_skb(skb);
+               return retval;
+       }
+
+       case OFPP_FLOOD:
                return output_all(dp, skb, 1);
-       else if (out_port == OFPP_ALL)
+
+       case OFPP_ALL:
                return output_all(dp, skb, 0);
-       else if (out_port == OFPP_CONTROLLER)
+
+       case OFPP_CONTROLLER:
                return dp_output_control(dp, skb, fwd_save_skb(skb), 0,
                                                  OFPR_ACTION);
-       else if (out_port == OFPP_TABLE) {
-               struct net_bridge_port *p = skb->dev->br_port;
-               struct sw_flow_key key;
-               struct sw_flow *flow;
-
-               flow_extract(skb, p ? p->port_no : OFPP_LOCAL, &key);
-               flow = chain_lookup(dp->chain, &key);
-               if (likely(flow != NULL)) {
-                       flow_used(flow, skb);
-                       execute_actions(dp, skb, &key, flow->actions, flow->n_actions);
-                       return 0;
-               }
-               return -ESRCH;
-       } else if (out_port == OFPP_LOCAL) {
+
+       case OFPP_LOCAL: {
                struct net_device *dev = dp->netdev;
                return dev ? dp_dev_recv(dev, skb) : -ESRCH;
-       } else if (out_port >= 0 && out_port < OFPP_MAX) {
+       }
+
+       case 0 ... OFPP_MAX-1: {
                struct net_bridge_port *p = dp->ports[out_port];
-               int len = skb->len;
                if (p == NULL)
                        goto bad_port;
-               skb->dev = p->dev; 
-               if (packet_length(skb) > skb->dev->mtu) {
-                       printk("dropped over-mtu packet: %d > %d\n",
-                              packet_length(skb), skb->dev->mtu);
+               if (p->dev == skb->dev) {
+                       /* To send to the input port, must use OFPP_IN_PORT */
                        kfree_skb(skb);
-                       return -E2BIG;
+                       if (net_ratelimit())
+                               printk("can't directly forward to input port\n");
+                       return -EINVAL;
                }
+               skb->dev = p->dev; 
+               return xmit_skb(skb);
+       }
 
-               dev_queue_xmit(skb);
-
-               return len;
+       default:
+               goto bad_port;
        }
 
 bad_port:
@@ -768,11 +791,14 @@ send_port_status(struct net_bridge_port *p, uint8_t status)
 }
 
 int 
-dp_send_flow_expired(struct datapath *dp, struct sw_flow *flow)
+dp_send_flow_expired(struct datapath *dp, struct sw_flow *flow,
+                    enum ofp_flow_expired_reason reason)
 {
        struct sk_buff *skb;
        struct ofp_flow_expired *ofe;
-       unsigned long duration_j;
+
+       if (!(dp->flags & OFPC_SEND_FLOW_EXP))
+               return 0;
 
        ofe = alloc_openflow_skb(dp, sizeof *ofe, OFPT_FLOW_EXPIRED, 0, &skb);
        if (!ofe)
@@ -780,11 +806,12 @@ dp_send_flow_expired(struct datapath *dp, struct sw_flow *flow)
 
        flow_fill_match(&ofe->match, &flow->key);
 
-       memset(ofe->pad, 0, sizeof ofe->pad);
        ofe->priority = htons(flow->priority);
+       ofe->reason = reason;
+       memset(ofe->pad, 0, sizeof ofe->pad);
 
-       duration_j = (flow->timeout - HZ * flow->max_idle) - flow->init_time;
-       ofe->duration     = htonl(duration_j / HZ);
+       ofe->duration     = htonl((jiffies - flow->init_time) / HZ);
+       memset(ofe->pad2, 0, sizeof ofe->pad2);
        ofe->packet_count = cpu_to_be64(flow->packet_count);
        ofe->byte_count   = cpu_to_be64(flow->byte_count);
 
@@ -812,6 +839,22 @@ dp_send_error_msg(struct datapath *dp, const struct sender *sender,
        return send_openflow_skb(skb, sender);
 }
 
+int
+dp_send_echo_reply(struct datapath *dp, const struct sender *sender,
+                  const struct ofp_header *rq)
+{
+       struct sk_buff *skb;
+       struct ofp_header *reply;
+
+       reply = alloc_openflow_skb(dp, ntohs(rq->length), OFPT_ECHO_REPLY,
+                                  sender, &skb);
+       if (!reply)
+               return -ENOMEM;
+
+       memcpy(reply + 1, rq + 1, ntohs(rq->length) - sizeof *rq);
+       return send_openflow_skb(skb, sender);
+}
+
 /* Generic Netlink interface.
  *
  * See netlink(7) for an introduction to netlink.  See
@@ -865,7 +908,6 @@ static int dp_genl_del(struct sk_buff *skb, struct genl_info *info)
        if (!info->attrs[DP_GENL_A_DP_IDX])
                return -EINVAL;
 
-       mutex_lock(&dp_mutex);
        dp = dp_get(nla_get_u32((info->attrs[DP_GENL_A_DP_IDX])));
        if (!dp)
                err = -ENOENT;
@@ -873,7 +915,6 @@ static int dp_genl_del(struct sk_buff *skb, struct genl_info *info)
                del_dp(dp);
                err = 0;
        }
-       mutex_unlock(&dp_mutex);
        return err;
 }
 
@@ -957,7 +998,6 @@ static int dp_genl_add_del_port(struct sk_buff *skb, struct genl_info *info)
                return -EINVAL;
 
        /* Get datapath. */
-       mutex_lock(&dp_mutex);
        dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
        if (!dp) {
                err = -ENOENT;
@@ -986,7 +1026,6 @@ static int dp_genl_add_del_port(struct sk_buff *skb, struct genl_info *info)
 out_put:
        dev_put(port);
 out:
-       mutex_unlock(&dp_mutex);
        return err;
 }
 
@@ -1017,26 +1056,22 @@ static int dp_genl_openflow(struct sk_buff *skb, struct genl_info *info)
        if (!info->attrs[DP_GENL_A_DP_IDX] || !va)
                return -EINVAL;
 
-       rcu_read_lock();
        dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
-       if (!dp) {
-               err = -ENOENT;
-               goto out;
-       }
+       if (!dp)
+               return -ENOENT;
 
-       if (nla_len(va) < sizeof(struct ofp_header)) {
-               err = -EINVAL;
-               goto out;
-       }
+       if (nla_len(va) < sizeof(struct ofp_header))
+               return -EINVAL;
        oh = nla_data(va);
 
        sender.xid = oh->xid;
        sender.pid = info->snd_pid;
        sender.seq = info->snd_seq;
-       err = fwd_control_input(dp->chain, &sender, nla_data(va), nla_len(va));
 
-out:
-       rcu_read_unlock();
+       mutex_lock(&dp_mutex);
+       err = fwd_control_input(dp->chain, &sender,
+                               nla_data(va), nla_len(va));
+       mutex_unlock(&dp_mutex);
        return err;
 }
 
@@ -1096,10 +1131,12 @@ static int flow_stats_dump_callback(struct sw_flow *flow, void *private)
        ofs->match.tp_src    = flow->key.tp_src;
        ofs->match.tp_dst    = flow->key.tp_dst;
        ofs->duration        = htonl((jiffies - flow->init_time) / HZ);
+       ofs->priority        = htons(flow->priority);
+       ofs->idle_timeout    = htons(flow->idle_timeout);
+       ofs->hard_timeout    = htons(flow->hard_timeout);
+       memset(ofs->pad2, 0, sizeof ofs->pad2);
        ofs->packet_count    = cpu_to_be64(flow->packet_count);
        ofs->byte_count      = cpu_to_be64(flow->byte_count);
-       ofs->priority        = htons(flow->priority);
-       ofs->max_idle        = htons(flow->max_idle);
        memcpy(ofs->actions, flow->actions, actions_length);
 
        s->bytes_used += length;
@@ -1220,7 +1257,7 @@ static int table_stats_dump(struct datapath *dp, void *state,
                memset(ots->pad, 0, sizeof ots->pad);
                ots->max_entries = htonl(stats.max_flows);
                ots->active_count = htonl(stats.n_flows);
-               ots->matched_count = cpu_to_be64(0); /* FIXME */
+               ots->matched_count = cpu_to_be64(stats.n_matched);
        }
        return 0;
 }
@@ -1262,10 +1299,18 @@ static int port_stats_dump(struct datapath *dp, void *state,
                stats = p->dev->get_stats(p->dev);
                ops->port_no = htons(p->port_no);
                memset(ops->pad, 0, sizeof ops->pad);
-               ops->rx_count = cpu_to_be64(stats->rx_packets);
-               ops->tx_count = cpu_to_be64(stats->tx_packets);
-               ops->drop_count = cpu_to_be64(stats->rx_dropped
-                                             + stats->tx_dropped);
+               ops->rx_packets   = cpu_to_be64(stats->rx_packets);
+               ops->tx_packets   = cpu_to_be64(stats->tx_packets);
+               ops->rx_bytes     = cpu_to_be64(stats->rx_bytes);
+               ops->tx_bytes     = cpu_to_be64(stats->tx_bytes);
+               ops->rx_dropped   = cpu_to_be64(stats->rx_dropped);
+               ops->tx_dropped   = cpu_to_be64(stats->tx_dropped);
+               ops->rx_errors    = cpu_to_be64(stats->rx_errors);
+               ops->tx_errors    = cpu_to_be64(stats->tx_errors);
+               ops->rx_frame_err = cpu_to_be64(stats->rx_frame_errors);
+               ops->rx_over_err  = cpu_to_be64(stats->rx_over_errors);
+               ops->rx_crc_err   = cpu_to_be64(stats->rx_crc_errors);
+               ops->collisions   = cpu_to_be64(stats->collisions);
                n_ports++;
                ops++;
        }
@@ -1352,7 +1397,6 @@ dp_genl_openflow_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
         * struct genl_ops.  This kluge supports earlier versions also. */
        cb->done = dp_genl_openflow_done;
 
-       rcu_read_lock();
        if (!cb->args[0]) {
                struct nlattr *attrs[DP_GENL_A_MAX + 1];
                struct ofp_stats_request *rq;
@@ -1365,21 +1409,17 @@ dp_genl_openflow_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
                if (err < 0)
                        return err;
 
-               err = -EINVAL;
-
                if (!attrs[DP_GENL_A_DP_IDX])
-                       goto out;
+                       return -EINVAL;
                dp_idx = nla_get_u16(attrs[DP_GENL_A_DP_IDX]);
                dp = dp_get(dp_idx);
-               if (!dp) {
-                       err = -ENOENT;
-                       goto out;
-               }
+               if (!dp)
+                       return -ENOENT;
 
                va = attrs[DP_GENL_A_OPENFLOW];
                len = nla_len(va);
                if (!va || len < sizeof *rq)
-                       goto out;
+                       return -EINVAL;
 
                rq = nla_data(va);
                type = ntohs(rq->type);
@@ -1388,12 +1428,12 @@ dp_genl_openflow_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
                    || ntohs(rq->header.length) != len
                    || type >= ARRAY_SIZE(stats)
                    || !stats[type].dump)
-                       goto out;
+                       return -EINVAL;
 
                s = &stats[type];
                body_len = len - offsetof(struct ofp_stats_request, body);
                if (body_len < s->min_body || body_len > s->max_body)
-                       goto out;
+                       return -EINVAL;
 
                cb->args[0] = 1;
                cb->args[1] = dp_idx;
@@ -1403,7 +1443,7 @@ dp_genl_openflow_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
                        void *state;
                        err = s->init(dp, rq->body, body_len, &state);
                        if (err)
-                               goto out;
+                               return err;
                        cb->args[4] = (long) state;
                }
        } else if (cb->args[0] == 1) {
@@ -1411,13 +1451,10 @@ dp_genl_openflow_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
                s = &stats[cb->args[2]];
 
                dp = dp_get(dp_idx);
-               if (!dp) {
-                       err = -ENOENT;
-                       goto out;
-               }
+               if (!dp)
+                       return -ENOENT;
        } else {
-               err = 0;
-               goto out;
+               return 0;
        }
 
        sender.xid = cb->args[3];
@@ -1426,10 +1463,8 @@ dp_genl_openflow_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 
        osr = put_openflow_headers(dp, skb, OFPT_STATS_REPLY, &sender,
                                   &max_openflow_len);
-       if (IS_ERR(osr)) {
-               err = PTR_ERR(osr);
-               goto out;
-       }
+       if (IS_ERR(osr))
+               return PTR_ERR(osr);
        osr->type = htons(s - stats);
        osr->flags = 0;
        resize_openflow_skb(skb, &osr->header, max_openflow_len);
@@ -1448,8 +1483,6 @@ dp_genl_openflow_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
                err = skb->len;
        }
 
-out:
-       rcu_read_unlock();
        return err;
 }
 
@@ -1472,20 +1505,6 @@ static struct genl_ops dp_genl_ops_openflow = {
        .dumpit = dp_genl_openflow_dumpit,
 };
 
-static struct nla_policy dp_genl_benchmark_policy[DP_GENL_A_MAX + 1] = {
-       [DP_GENL_A_DP_IDX] = { .type = NLA_U32 },
-       [DP_GENL_A_NPACKETS] = { .type = NLA_U32 },
-       [DP_GENL_A_PSIZE] = { .type = NLA_U32 },
-};
-
-static struct genl_ops dp_genl_ops_benchmark_nl = {
-       .cmd = DP_GENL_C_BENCHMARK_NL,
-       .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-       .policy = dp_genl_benchmark_policy,
-       .doit = dp_genl_benchmark_nl,
-       .dumpit = NULL,
-};
-
 static struct genl_ops *dp_genl_all_ops[] = {
        /* Keep this operation first.  Generic Netlink dispatching
         * looks up operations with linear search, so we want it at the
@@ -1497,7 +1516,6 @@ static struct genl_ops *dp_genl_all_ops[] = {
        &dp_genl_ops_query_dp,
        &dp_genl_ops_add_port,
        &dp_genl_ops_del_port,
-       &dp_genl_ops_benchmark_nl,
 };
 
 static int dp_init_netlink(void)