#include <linux/netfilter_bridge.h>
#include <linux/inetdevice.h>
#include <linux/list.h>
+#include <linux/rculist.h>
#include "openflow-netlink.h"
#include "datapath.h"
#include "dp_dev.h"
#include "forward.h"
#include "flow.h"
-#include "datapath_t.h"
#include "compat.h"
/* It's hard to imagine wanting more than one datapath, but... */
#define DP_MAX 32
-/* datapaths. Protected on the read side by rcu_read_lock, on the write side
- * by dp_mutex.
+/* Datapaths. Protected on the read side by rcu_read_lock, on the write side
+ * by dp_mutex. dp_mutex is almost completely redundant with genl_mutex
+ * maintained by the Generic Netlink code, but the timeout path needs mutual
+ * exclusion too.
*
* It is safe to access the datapath and net_bridge_port structures with just
- * the dp_mutex, but to access the chain you need to take the rcu_read_lock
- * also (because dp_mutex doesn't prevent flows from being destroyed).
+ * dp_mutex.
*/
static struct datapath *dps[DP_MAX];
-static DEFINE_MUTEX(dp_mutex);
+DEFINE_MUTEX(dp_mutex);
+EXPORT_SYMBOL(dp_mutex);
static int dp_maint_func(void *data);
static int send_port_status(struct net_bridge_port *p, uint8_t status);
static int
send_openflow_skb(struct sk_buff *skb, const struct sender *sender)
{
- int err = (sender
- ? genlmsg_unicast(skb, sender->pid)
- : genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC));
- if (err && net_ratelimit())
- printk(KERN_WARNING "send_openflow_skb: send failed: %d\n",
- err);
- return err;
+ return (sender
+ ? genlmsg_unicast(skb, sender->pid)
+ : genlmsg_multicast(skb, 0, mc_group.id, GFP_ATOMIC));
}
/* Generates a unique datapath id. It incorporates the datapath index
}
/* Creates a new datapath numbered 'dp_idx'. Returns 0 for success or a
- * negative error code.
- *
- * Not called with any locks. */
+ * negative error code. */
static int new_dp(int dp_idx)
{
struct datapath *dp;
if (!try_module_get(THIS_MODULE))
return -ENODEV;
- mutex_lock(&dp_mutex);
- dp = rcu_dereference(dps[dp_idx]);
- if (dp != NULL) {
+ /* Exit early if a datapath with that number already exists. */
+ if (dps[dp_idx]) {
err = -EEXIST;
goto err_unlock;
}
if (IS_ERR(dp->dp_task))
goto err_destroy_chain;
- rcu_assign_pointer(dps[dp_idx], dp);
- mutex_unlock(&dp_mutex);
+ dps[dp_idx] = dp;
return 0;
err_free_dp:
kfree(dp);
err_unlock:
- mutex_unlock(&dp_mutex);
module_put(THIS_MODULE);
return err;
}
-/* Find and return a free port number under 'dp'. Called under dp_mutex. */
+/* Find and return a free port number under 'dp'. */
static int find_portno(struct datapath *dp)
{
int i;
return p;
}
-/* Called with dp_mutex. */
int add_switch_port(struct datapath *dp, struct net_device *dev)
{
struct net_bridge_port *p;
return 0;
}
-/* Delete 'p' from switch.
- * Called with dp_mutex. */
+/* Delete 'p' from switch. */
static int del_switch_port(struct net_bridge_port *p)
{
/* First drop references to device. */
return 0;
}
-/* Called with dp_mutex. */
static void del_dp(struct datapath *dp)
{
- struct net_bridge_port *p;
+ struct net_bridge_port *p, *n;
- dp_dev_destroy(dp);
kthread_stop(dp->dp_task);
/* Drop references to DP. */
- list_for_each_entry_rcu (p, &dp->port_list, node)
+ list_for_each_entry_safe (p, n, &dp->port_list, node)
del_switch_port(p);
rcu_assign_pointer(dps[dp->dp_idx], NULL);
+ /* Kill off local_port dev references from buffered packets that have
+ * associated dst entries. */
+ synchronize_rcu();
+ fwd_discard_all();
+
+ /* Destroy dp->netdev. (Must follow deleting switch ports since
+ * dp->local_port has a reference to it.) */
+ dp_dev_destroy(dp);
+
/* Wait until no longer in use, then destroy it. */
synchronize_rcu();
chain_destroy(dp->chain);
do_port_input(struct net_bridge_port *p, struct sk_buff *skb)
{
/* Push the Ethernet header back on. */
- if (skb->protocol == htons(ETH_P_8021Q))
- skb_push(skb, VLAN_ETH_HLEN);
- else
- skb_push(skb, ETH_HLEN);
+ skb_push(skb, ETH_HLEN);
fwd_port_input(p->dp->chain, skb, p->port_no);
}
}
#else
/* NB: This has only been tested on 2.4.35 */
-
-/* Called without any locks (?) */
static void dp_frame_hook(struct sk_buff *skb)
{
struct net_bridge_port *p = skb->dev->br_port;
return -ENOENT;
}
+static int xmit_skb(struct sk_buff *skb)
+{
+ int len = skb->len;
+ if (packet_length(skb) > skb->dev->mtu) {
+ printk("dropped over-mtu packet: %d > %d\n",
+ packet_length(skb), skb->dev->mtu);
+ kfree_skb(skb);
+ return -E2BIG;
+ }
+
+ dev_queue_xmit(skb);
+
+ return len;
+}
+
/* Takes ownership of 'skb' and transmits it to 'out_port' on 'dp'.
*/
int dp_output_port(struct datapath *dp, struct sk_buff *skb, int out_port)
{
BUG_ON(!skb);
- if (out_port == OFPP_FLOOD)
+ switch (out_port){
+ case OFPP_IN_PORT:
+ /* Send it out the port it came in on, which is already set in
+ * the skb. */
+ if (!skb->dev) {
+ if (net_ratelimit())
+ printk("skb device not set forwarding to in_port\n");
+ kfree(skb);
+ return -ESRCH;
+ }
+ return xmit_skb(skb);
+
+ case OFPP_TABLE: {
+ struct net_bridge_port *p = skb->dev->br_port;
+ int retval;
+ retval = run_flow_through_tables(dp->chain, skb,
+ p ? p->port_no : OFPP_LOCAL);
+ if (retval)
+ kfree_skb(skb);
+ return retval;
+ }
+
+ case OFPP_FLOOD:
return output_all(dp, skb, 1);
- else if (out_port == OFPP_ALL)
+
+ case OFPP_ALL:
return output_all(dp, skb, 0);
- else if (out_port == OFPP_CONTROLLER)
+
+ case OFPP_CONTROLLER:
return dp_output_control(dp, skb, fwd_save_skb(skb), 0,
OFPR_ACTION);
- else if (out_port == OFPP_TABLE) {
- struct net_bridge_port *p = skb->dev->br_port;
- struct sw_flow_key key;
- struct sw_flow *flow;
-
- flow_extract(skb, p ? p->port_no : OFPP_LOCAL, &key);
- flow = chain_lookup(dp->chain, &key);
- if (likely(flow != NULL)) {
- flow_used(flow, skb);
- execute_actions(dp, skb, &key, flow->actions, flow->n_actions);
- return 0;
- }
- return -ESRCH;
- } else if (out_port == OFPP_LOCAL) {
+
+ case OFPP_LOCAL: {
struct net_device *dev = dp->netdev;
return dev ? dp_dev_recv(dev, skb) : -ESRCH;
- } else if (out_port >= 0 && out_port < OFPP_MAX) {
+ }
+
+ case 0 ... OFPP_MAX-1: {
struct net_bridge_port *p = dp->ports[out_port];
- int len = skb->len;
if (p == NULL)
goto bad_port;
- skb->dev = p->dev;
- if (packet_length(skb) > skb->dev->mtu) {
- printk("dropped over-mtu packet: %d > %d\n",
- packet_length(skb), skb->dev->mtu);
+ if (p->dev == skb->dev) {
+ /* To send to the input port, must use OFPP_IN_PORT */
kfree_skb(skb);
- return -E2BIG;
+ if (net_ratelimit())
+ printk("can't directly forward to input port\n");
+ return -EINVAL;
}
+ skb->dev = p->dev;
+ return xmit_skb(skb);
+ }
- dev_queue_xmit(skb);
-
- return len;
+ default:
+ goto bad_port;
}
bad_port:
}
int
-dp_send_flow_expired(struct datapath *dp, struct sw_flow *flow)
+dp_send_flow_expired(struct datapath *dp, struct sw_flow *flow,
+ enum ofp_flow_expired_reason reason)
{
struct sk_buff *skb;
struct ofp_flow_expired *ofe;
- unsigned long duration_j;
+
+ if (!(dp->flags & OFPC_SEND_FLOW_EXP))
+ return 0;
ofe = alloc_openflow_skb(dp, sizeof *ofe, OFPT_FLOW_EXPIRED, 0, &skb);
if (!ofe)
flow_fill_match(&ofe->match, &flow->key);
- memset(ofe->pad, 0, sizeof ofe->pad);
ofe->priority = htons(flow->priority);
+ ofe->reason = reason;
+ memset(ofe->pad, 0, sizeof ofe->pad);
- duration_j = (flow->timeout - HZ * flow->max_idle) - flow->init_time;
- ofe->duration = htonl(duration_j / HZ);
+ ofe->duration = htonl((jiffies - flow->init_time) / HZ);
+ memset(ofe->pad2, 0, sizeof ofe->pad2);
ofe->packet_count = cpu_to_be64(flow->packet_count);
ofe->byte_count = cpu_to_be64(flow->byte_count);
return send_openflow_skb(skb, sender);
}
+int
+dp_send_echo_reply(struct datapath *dp, const struct sender *sender,
+ const struct ofp_header *rq)
+{
+ struct sk_buff *skb;
+ struct ofp_header *reply;
+
+ reply = alloc_openflow_skb(dp, ntohs(rq->length), OFPT_ECHO_REPLY,
+ sender, &skb);
+ if (!reply)
+ return -ENOMEM;
+
+ memcpy(reply + 1, rq + 1, ntohs(rq->length) - sizeof *rq);
+ return send_openflow_skb(skb, sender);
+}
+
/* Generic Netlink interface.
*
* See netlink(7) for an introduction to netlink. See
if (!info->attrs[DP_GENL_A_DP_IDX])
return -EINVAL;
- mutex_lock(&dp_mutex);
dp = dp_get(nla_get_u32((info->attrs[DP_GENL_A_DP_IDX])));
if (!dp)
err = -ENOENT;
del_dp(dp);
err = 0;
}
- mutex_unlock(&dp_mutex);
return err;
}
return -EINVAL;
/* Get datapath. */
- mutex_lock(&dp_mutex);
dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
if (!dp) {
err = -ENOENT;
out_put:
dev_put(port);
out:
- mutex_unlock(&dp_mutex);
return err;
}
if (!info->attrs[DP_GENL_A_DP_IDX] || !va)
return -EINVAL;
- rcu_read_lock();
dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
- if (!dp) {
- err = -ENOENT;
- goto out;
- }
+ if (!dp)
+ return -ENOENT;
- if (nla_len(va) < sizeof(struct ofp_header)) {
- err = -EINVAL;
- goto out;
- }
+ if (nla_len(va) < sizeof(struct ofp_header))
+ return -EINVAL;
oh = nla_data(va);
sender.xid = oh->xid;
sender.pid = info->snd_pid;
sender.seq = info->snd_seq;
- err = fwd_control_input(dp->chain, &sender, nla_data(va), nla_len(va));
-out:
- rcu_read_unlock();
+ mutex_lock(&dp_mutex);
+ err = fwd_control_input(dp->chain, &sender,
+ nla_data(va), nla_len(va));
+ mutex_unlock(&dp_mutex);
return err;
}
ofs->match.tp_src = flow->key.tp_src;
ofs->match.tp_dst = flow->key.tp_dst;
ofs->duration = htonl((jiffies - flow->init_time) / HZ);
+ ofs->priority = htons(flow->priority);
+ ofs->idle_timeout = htons(flow->idle_timeout);
+ ofs->hard_timeout = htons(flow->hard_timeout);
+ memset(ofs->pad2, 0, sizeof ofs->pad2);
ofs->packet_count = cpu_to_be64(flow->packet_count);
ofs->byte_count = cpu_to_be64(flow->byte_count);
- ofs->priority = htons(flow->priority);
- ofs->max_idle = htons(flow->max_idle);
memcpy(ofs->actions, flow->actions, actions_length);
s->bytes_used += length;
memset(ots->pad, 0, sizeof ots->pad);
ots->max_entries = htonl(stats.max_flows);
ots->active_count = htonl(stats.n_flows);
- ots->matched_count = cpu_to_be64(0); /* FIXME */
+ ots->matched_count = cpu_to_be64(stats.n_matched);
}
return 0;
}
stats = p->dev->get_stats(p->dev);
ops->port_no = htons(p->port_no);
memset(ops->pad, 0, sizeof ops->pad);
- ops->rx_count = cpu_to_be64(stats->rx_packets);
- ops->tx_count = cpu_to_be64(stats->tx_packets);
- ops->drop_count = cpu_to_be64(stats->rx_dropped
- + stats->tx_dropped);
+ ops->rx_packets = cpu_to_be64(stats->rx_packets);
+ ops->tx_packets = cpu_to_be64(stats->tx_packets);
+ ops->rx_bytes = cpu_to_be64(stats->rx_bytes);
+ ops->tx_bytes = cpu_to_be64(stats->tx_bytes);
+ ops->rx_dropped = cpu_to_be64(stats->rx_dropped);
+ ops->tx_dropped = cpu_to_be64(stats->tx_dropped);
+ ops->rx_errors = cpu_to_be64(stats->rx_errors);
+ ops->tx_errors = cpu_to_be64(stats->tx_errors);
+ ops->rx_frame_err = cpu_to_be64(stats->rx_frame_errors);
+ ops->rx_over_err = cpu_to_be64(stats->rx_over_errors);
+ ops->rx_crc_err = cpu_to_be64(stats->rx_crc_errors);
+ ops->collisions = cpu_to_be64(stats->collisions);
n_ports++;
ops++;
}
* struct genl_ops. This kluge supports earlier versions also. */
cb->done = dp_genl_openflow_done;
- rcu_read_lock();
if (!cb->args[0]) {
struct nlattr *attrs[DP_GENL_A_MAX + 1];
struct ofp_stats_request *rq;
if (err < 0)
return err;
- err = -EINVAL;
-
if (!attrs[DP_GENL_A_DP_IDX])
- goto out;
+ return -EINVAL;
dp_idx = nla_get_u16(attrs[DP_GENL_A_DP_IDX]);
dp = dp_get(dp_idx);
- if (!dp) {
- err = -ENOENT;
- goto out;
- }
+ if (!dp)
+ return -ENOENT;
va = attrs[DP_GENL_A_OPENFLOW];
len = nla_len(va);
if (!va || len < sizeof *rq)
- goto out;
+ return -EINVAL;
rq = nla_data(va);
type = ntohs(rq->type);
|| ntohs(rq->header.length) != len
|| type >= ARRAY_SIZE(stats)
|| !stats[type].dump)
- goto out;
+ return -EINVAL;
s = &stats[type];
body_len = len - offsetof(struct ofp_stats_request, body);
if (body_len < s->min_body || body_len > s->max_body)
- goto out;
+ return -EINVAL;
cb->args[0] = 1;
cb->args[1] = dp_idx;
void *state;
err = s->init(dp, rq->body, body_len, &state);
if (err)
- goto out;
+ return err;
cb->args[4] = (long) state;
}
} else if (cb->args[0] == 1) {
s = &stats[cb->args[2]];
dp = dp_get(dp_idx);
- if (!dp) {
- err = -ENOENT;
- goto out;
- }
+ if (!dp)
+ return -ENOENT;
} else {
- err = 0;
- goto out;
+ return 0;
}
sender.xid = cb->args[3];
osr = put_openflow_headers(dp, skb, OFPT_STATS_REPLY, &sender,
&max_openflow_len);
- if (IS_ERR(osr)) {
- err = PTR_ERR(osr);
- goto out;
- }
+ if (IS_ERR(osr))
+ return PTR_ERR(osr);
osr->type = htons(s - stats);
osr->flags = 0;
resize_openflow_skb(skb, &osr->header, max_openflow_len);
err = skb->len;
}
-out:
- rcu_read_unlock();
return err;
}
.dumpit = dp_genl_openflow_dumpit,
};
-static struct nla_policy dp_genl_benchmark_policy[DP_GENL_A_MAX + 1] = {
- [DP_GENL_A_DP_IDX] = { .type = NLA_U32 },
- [DP_GENL_A_NPACKETS] = { .type = NLA_U32 },
- [DP_GENL_A_PSIZE] = { .type = NLA_U32 },
-};
-
-static struct genl_ops dp_genl_ops_benchmark_nl = {
- .cmd = DP_GENL_C_BENCHMARK_NL,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
- .policy = dp_genl_benchmark_policy,
- .doit = dp_genl_benchmark_nl,
- .dumpit = NULL,
-};
-
static struct genl_ops *dp_genl_all_ops[] = {
/* Keep this operation first. Generic Netlink dispatching
* looks up operations with linear search, so we want it at the
&dp_genl_ops_query_dp,
&dp_genl_ops_add_port,
&dp_genl_ops_del_port,
- &dp_genl_ops_benchmark_nl,
};
static int dp_init_netlink(void)