actions.h \
compat.h \
datapath.h \
- dp_dev.h \
dp_sysfs.h \
- flow.h
+ flow.h \
- odp-compat.h \
+ table.h \
+ vport.h \
+ vport-internal_dev.h \
- vport-netdev.h
++ vport-netdev.h \
++ xflow-compat.h
dist_sources = $(foreach module,$(dist_modules),$($(module)_sources))
dist_headers = $(foreach module,$(dist_modules),$($(module)_headers))
#include <net/inet_ecn.h>
#include <net/ip.h>
#include <net/checksum.h>
- #include "datapath.h"
- #include "dp_dev.h"
+
#include "actions.h"
-#include "openvswitch/datapath-protocol.h"
+ #include "datapath.h"
+#include "openvswitch/xflow.h"
+ #include "vport.h"
static struct sk_buff *
make_writable(struct sk_buff *skb, unsigned min_headroom, gfp_t gfp)
return NULL;
}
-static void set_tunnel(struct sk_buff *skb, struct odp_flow_key *key,
++static void set_tunnel(struct sk_buff *skb, struct xflow_key *key,
+ __be32 tun_id)
+ {
+ OVS_CB(skb)->tun_id = key->tun_id = tun_id;
+ }
static struct sk_buff *
vlan_pull_tag(struct sk_buff *skb)
/* Send a copy of this packet up to the sFlow agent, along with extra
* information about what happened to it. */
static void sflow_sample(struct datapath *dp, struct sk_buff *skb,
- const union odp_action *a, int n_actions,
+ const union xflow_action *a, int n_actions,
- gfp_t gfp, struct net_bridge_port *nbp)
+ gfp_t gfp, struct dp_port *dp_port)
{
- struct odp_sflow_sample_header *hdr;
- unsigned int actlen = n_actions * sizeof(union odp_action);
- unsigned int hdrlen = sizeof(struct odp_sflow_sample_header);
+ struct xflow_sflow_sample_header *hdr;
+ unsigned int actlen = n_actions * sizeof(union xflow_action);
+ unsigned int hdrlen = sizeof(struct xflow_sflow_sample_header);
struct sk_buff *nskb;
nskb = skb_copy_expand(skb, actlen + hdrlen, 0, gfp);
return;
memcpy(__skb_push(nskb, actlen), a, actlen);
- hdr = (struct odp_sflow_sample_header*)__skb_push(nskb, hdrlen);
+ hdr = (struct xflow_sflow_sample_header*)__skb_push(nskb, hdrlen);
hdr->n_actions = n_actions;
- hdr->sample_pool = atomic_read(&nbp->sflow_pool);
+ hdr->sample_pool = atomic_read(&dp_port->sflow_pool);
- dp_output_control(dp, nskb, _ODPL_SFLOW_NR, 0);
+ dp_output_control(dp, nskb, _XFLOWL_SFLOW_NR, 0);
}
/* Execute a list of actions against 'skb'. */
}
break;
- case ODPAT_SET_TUNNEL:
++ case XFLOWAT_SET_TUNNEL:
+ set_tunnel(skb, key, a->tunnel.tun_id);
+ break;
+
- case ODPAT_SET_VLAN_VID:
- case ODPAT_SET_VLAN_PCP:
+ case XFLOWAT_SET_DL_TCI:
skb = modify_vlan_tci(dp, skb, key, a, n_actions, gfp);
if (IS_ERR(skb))
return PTR_ERR(skb);
struct datapath;
struct sk_buff;
-struct odp_flow_key;
-union odp_action;
+struct xflow_key;
+union xflow_action;
- int dp_xmit_skb(struct sk_buff *);
int execute_actions(struct datapath *dp, struct sk_buff *skb,
- struct odp_flow_key *key,
- const union odp_action *, int n_actions,
+ struct xflow_key *key,
+ const union xflow_action *, int n_actions,
gfp_t gfp);
+ static inline void
+ set_skb_csum_bits(const struct sk_buff *old_skb, struct sk_buff *new_skb)
+ {
+ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
+ /* Before 2.6.24 these fields were not copied when
+ * doing an skb_copy_expand. */
+ new_skb->ip_summed = old_skb->ip_summed;
+ new_skb->csum = old_skb->csum;
+ #endif
+ #if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
+ /* These fields are copied in skb_clone but not in
+ * skb_copy or related functions. We need to manually
+ * copy them over here. */
+ new_skb->proto_data_valid = old_skb->proto_data_valid;
+ new_skb->proto_csum_blank = old_skb->proto_csum_blank;
+ #endif
+ }
+
#endif /* actions.h */
#include <linux/workqueue.h>
#include <linux/dmi.h>
#include <net/inet_ecn.h>
- #include <net/llc.h>
+ #include <linux/compat.h>
-#include "openvswitch/datapath-protocol.h"
+#include "openvswitch/xflow.h"
#include "datapath.h"
#include "actions.h"
- #include "dp_dev.h"
#include "flow.h"
-#include "odp-compat.h"
++#include "xflow-compat.h"
+ #include "table.h"
+ #include "vport-internal_dev.h"
#include "compat.h"
* dp_mutex nests inside the RTNL lock: if you need both you must take the RTNL
* lock first.
*
- * It is safe to access the datapath and net_bridge_port structures with just
+ * It is safe to access the datapath and dp_port structures with just
* dp_mutex.
*/
-static struct datapath *dps[ODP_MAX];
+static struct datapath *dps[XFLOW_MAX];
static DEFINE_MUTEX(dp_mutex);
/* Number of milliseconds between runs of the maintenance thread. */
#define MAINT_SLEEP_MSECS 1000
- static int new_nbp(struct datapath *, struct net_device *, int port_no);
-static int new_dp_port(struct datapath *, struct odp_port *, int port_no);
++static int new_dp_port(struct datapath *, struct xflow_port *, int port_no);
/* Must be called with rcu_read_lock or dp_mutex. */
struct datapath *get_dp(int dp_idx)
return dp;
}
- return vport_get_name(dp->ports[ODPP_LOCAL]->vport);
+ /* Must be called with rcu_read_lock or RTNL lock. */
+ const char *dp_name(const struct datapath *dp)
+ {
++ return vport_get_name(dp->ports[XFLOWP_LOCAL]->vport);
+ }
+
static inline size_t br_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ifinfomsg))
hdr = nlmsg_data(nlh);
hdr->ifi_family = AF_BRIDGE;
hdr->__ifi_pad = 0;
- hdr->ifi_type = dev->type;
- hdr->ifi_index = dev->ifindex;
- hdr->ifi_flags = dev_get_flags(dev);
+ hdr->ifi_type = ARPHRD_ETHER;
+ hdr->ifi_index = ifindex;
+ hdr->ifi_flags = vport_get_flags(port->vport);
hdr->ifi_change = 0;
- NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
- NLA_PUT_U32(skb, IFLA_MASTER, dp->ports[XFLOWP_LOCAL]->dev->ifindex);
- NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+ NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port->vport));
- NLA_PUT_U32(skb, IFLA_MASTER, vport_get_ifindex(dp->ports[ODPP_LOCAL]->vport));
++ NLA_PUT_U32(skb, IFLA_MASTER, vport_get_ifindex(dp->ports[XFLOWP_LOCAL]->vport));
+ NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port->vport));
#ifdef IFLA_OPERSTATE
NLA_PUT_U8(skb, IFLA_OPERSTATE,
- netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
+ vport_is_running(port->vport)
+ ? vport_get_operstate(port->vport)
+ : IF_OPER_DOWN);
#endif
- if (dev->addr_len)
- NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+ NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN,
+ vport_get_addr(port->vport));
- if (dev->ifindex != dev->iflink)
- NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
+ if (ifindex != iflink)
+ NLA_PUT_U32(skb, IFLA_LINK,iflink);
return nlmsg_end(skb, nlh);
static int create_dp(int dp_idx, const char __user *devnamep)
{
- struct net_device *dp_dev;
- struct odp_port internal_dev_port;
++ struct xflow_port internal_dev_port;
char devname[IFNAMSIZ];
struct datapath *dp;
int err;
goto err_free_dp;
/* Set up our datapath device. */
- dp_dev = dp_dev_create(dp, devname, XFLOWP_LOCAL);
- err = PTR_ERR(dp_dev);
- if (IS_ERR(dp_dev))
- goto err_destroy_table;
-
- err = new_nbp(dp, dp_dev, XFLOWP_LOCAL);
+ BUILD_BUG_ON(sizeof(internal_dev_port.devname) != sizeof(devname));
+ strcpy(internal_dev_port.devname, devname);
- internal_dev_port.flags = ODP_PORT_INTERNAL;
- err = new_dp_port(dp, &internal_dev_port, ODPP_LOCAL);
++ internal_dev_port.flags = XFLOW_PORT_INTERNAL;
++ err = new_dp_port(dp, &internal_dev_port, XFLOWP_LOCAL);
if (err) {
- dp_dev_destroy(dp_dev);
+ if (err == -EBUSY)
+ err = -EEXIST;
+
goto err_destroy_table;
}
return 0;
err_destroy_local_port:
- dp_del_port(dp->ports[XFLOWP_LOCAL]);
- dp_detach_port(dp->ports[ODPP_LOCAL], 1);
++ dp_detach_port(dp->ports[XFLOWP_LOCAL], 1);
err_destroy_table:
- dp_table_destroy(dp->table, 0);
+ tbl_destroy(dp->table, NULL);
err_free_dp:
kfree(dp);
err_put_module:
int i;
list_for_each_entry_safe (p, n, &dp->port_list, node)
- if (p->port_no != ODPP_LOCAL)
+ if (p->port_no != XFLOWP_LOCAL)
- dp_del_port(p);
+ dp_detach_port(p, 1);
dp_sysfs_del_dp(dp);
rcu_assign_pointer(dps[dp->dp_idx], NULL);
- dp_del_port(dp->ports[XFLOWP_LOCAL]);
- dp_detach_port(dp->ports[ODPP_LOCAL], 1);
++ dp_detach_port(dp->ports[XFLOWP_LOCAL], 1);
- dp_table_destroy(dp->table, 1);
+ tbl_destroy(dp->table, flow_free_tbl);
for (i = 0; i < DP_N_QUEUES; i++)
skb_queue_purge(&dp->queues[i]);
};
/* Called with RTNL lock and dp_mutex. */
- static int new_nbp(struct datapath *dp, struct net_device *dev, int port_no)
-static int new_dp_port(struct datapath *dp, struct odp_port *odp_port, int port_no)
++static int new_dp_port(struct datapath *dp, struct xflow_port *xflow_port, int port_no)
{
- struct net_bridge_port *p;
+ struct vport *vport;
+ struct dp_port *p;
+ int err;
+
- vport = vport_locate(odp_port->devname);
++ vport = vport_locate(xflow_port->devname);
+ if (!vport) {
+ vport_lock();
+
- if (odp_port->flags & ODP_PORT_INTERNAL)
- vport = __vport_add(odp_port->devname, "internal", NULL);
++ if (xflow_port->flags & XFLOW_PORT_INTERNAL)
++ vport = __vport_add(xflow_port->devname, "internal", NULL);
+ else
- vport = __vport_add(odp_port->devname, "netdev", NULL);
++ vport = __vport_add(xflow_port->devname, "netdev", NULL);
- if (dev->br_port != NULL)
- return -EBUSY;
+ vport_unlock();
+
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+ }
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return 0;
}
- static int add_port(int dp_idx, struct xflow_port __user *portp)
-static int attach_port(int dp_idx, struct odp_port __user *portp)
++static int attach_port(int dp_idx, struct xflow_port __user *portp)
{
- struct net_device *dev;
struct datapath *dp;
- struct odp_port port;
+ struct xflow_port port;
int port_no;
int err;
return err;
}
- int dp_del_port(struct net_bridge_port *p)
+ int dp_detach_port(struct dp_port *p, int may_delete)
{
+ struct vport *vport = p->vport;
+ int err;
+
ASSERT_RTNL();
- if (p->port_no != ODPP_LOCAL)
+ if (p->port_no != XFLOWP_LOCAL)
dp_sysfs_del_if(p);
dp_ifinfo_notify(RTM_DELLINK, p);
{
struct datapath *dp = p->dp;
struct dp_stats_percpu *stats;
- struct odp_flow_key key;
+ int stats_counter_off;
- struct sw_flow *flow;
+ struct xflow_key key;
+ struct tbl_node *flow_node;
WARN_ON_ONCE(skb_shared(skb));
+ skb_warn_if_lro(skb);
- compute_ip_summed(skb, false);
-
- /* BHs are off so we don't have to use get_cpu()/put_cpu() here. */
- stats = percpu_ptr(dp->stats_percpu, smp_processor_id());
+ OVS_CB(skb)->dp_port = p;
- if (flow_extract(skb, p ? p->port_no : ODPP_NONE, &key)) {
+ if (flow_extract(skb, p ? p->port_no : XFLOWP_NONE, &key)) {
if (dp->drop_frags) {
kfree_skb(skb);
- stats->n_frags++;
- return;
+ stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
+ goto out;
}
}
flow_used(flow, skb);
execute_actions(dp, skb, &key, acts->actions, acts->n_actions,
GFP_ATOMIC);
- stats->n_hit++;
+ stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
} else {
- stats->n_missed++;
- dp_output_control(dp, skb, _XFLOWL_MISS_NR, 0);
+ stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
- dp_output_control(dp, skb, _ODPL_MISS_NR, OVS_CB(skb)->tun_id);
++ dp_output_control(dp, skb, _XFLOWL_MISS_NR, OVS_CB(skb)->tun_id);
}
- }
- /*
- * Used as br_handle_frame_hook. (Cannot run bridge at the same time, even on
- * different set of devices!)
- */
- #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
- /* Called with rcu_read_lock and bottom-halves disabled. */
- static struct sk_buff *dp_frame_hook(struct net_bridge_port *p,
- struct sk_buff *skb)
- {
- do_port_input(p, skb);
- return NULL;
- }
- #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- /* Called with rcu_read_lock and bottom-halves disabled. */
- static int dp_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb)
- {
- do_port_input(p, *pskb);
- return 1;
+ out:
+ local_bh_disable();
+ stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+ (*(u64 *)((u8 *)stats + stats_counter_off))++;
+ local_bh_enable();
}
- #else
- #error
- #endif
#if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
- /* This code is based on a skb_checksum_setup from net/dev/core.c from a
- * combination of Lenny's 2.6.26 Xen kernel and Xen's
- * linux-2.6.18-92.1.10.el5.xs5.0.0.394.644. We can't call this function
- * directly because it isn't exported in all versions. */
- static int skb_pull_up_to(struct sk_buff *skb, void *ptr)
- {
- if (ptr < (void *)skb->tail)
- return 1;
- if (__pskb_pull_tail(skb,
- ptr - (void *)skb->data - skb_headlen(skb))) {
- return 1;
- } else {
- return 0;
- }
- }
-
+ /* This code is based on skb_checksum_setup() from Xen's net/dev/core.c. We
+ * can't call this function directly because it isn't exported in all
+ * versions. */
int vswitch_skb_checksum_setup(struct sk_buff *skb)
{
struct iphdr *iph;
int port_no;
int err;
- port_no = XFLOWP_LOCAL;
- if (skb->dev) {
- if (skb->dev->br_port)
- port_no = skb->dev->br_port->port_no;
- else if (is_dp_dev(skb->dev))
- port_no = dp_dev_priv(skb->dev)->port_no;
- }
+ if (OVS_CB(skb)->dp_port)
+ port_no = OVS_CB(skb)->dp_port->port_no;
+ else
- port_no = ODPP_LOCAL;
++ port_no = XFLOWP_LOCAL;
do {
- struct odp_msg *header;
+ struct xflow_msg *header;
nskb = skb->next;
skb->next = NULL;
flow->byte_count = 0;
}
- static int put_flow(struct datapath *dp, struct xflow_flow_put __user *ufp)
+ static int expand_table(struct datapath *dp)
{
- struct xflow_flow_put uf;
+ struct tbl *old_table = rcu_dereference(dp->table);
+ struct tbl *new_table;
+
+ new_table = tbl_expand(old_table);
+ if (IS_ERR(new_table))
+ return PTR_ERR(new_table);
+
+ rcu_assign_pointer(dp->table, new_table);
+ tbl_deferred_destroy(old_table, NULL);
+
+ return 0;
+ }
+
-static int do_put_flow(struct datapath *dp, struct odp_flow_put *uf,
- struct odp_flow_stats *stats)
++static int do_put_flow(struct datapath *dp, struct xflow_flow_put *uf,
++ struct xflow_flow_stats *stats)
+ {
+ struct tbl_node *flow_node;
struct sw_flow *flow;
- struct dp_table *table;
- struct xflow_flow_stats stats;
+ struct tbl *table;
int error;
- error = -EFAULT;
- if (copy_from_user(&uf, ufp, sizeof(struct xflow_flow_put)))
- goto error;
- memset(uf->flow.key.reserved, 0, sizeof uf->flow.key.reserved);
--
table = rcu_dereference(dp->table);
- flow = dp_table_lookup(table, &uf.flow.key);
- if (!flow) {
+ flow_node = tbl_lookup(table, &uf->flow.key, flow_hash(&uf->flow.key), flow_cmp);
+ if (!flow_node) {
/* No such flow. */
struct sw_flow_actions *acts;
error = -ENOENT;
- if (!(uf.flags & XFLOWPF_CREATE))
- if (!(uf->flags & ODPPF_CREATE))
++ if (!(uf->flags & XFLOWPF_CREATE))
goto error;
/* Expand table, if necessary, to make room. */
rcu_assign_pointer(flow->sf_acts, acts);
/* Put flow in bucket. */
- error = dp_table_insert(table, flow);
+ error = tbl_insert(table, &flow->tbl_node, flow_hash(&flow->key));
if (error)
goto error_free_flow_acts;
- dp->n_flows++;
- memset(&stats, 0, sizeof(struct xflow_flow_stats));
+
- memset(stats, 0, sizeof(struct odp_flow_stats));
++ memset(stats, 0, sizeof(struct xflow_flow_stats));
} else {
/* We found a matching flow. */
struct sw_flow_actions *old_acts, *new_acts;
/* Bail out if we're not allowed to modify an existing flow. */
error = -EEXIST;
- if (!(uf.flags & XFLOWPF_MODIFY))
- if (!(uf->flags & ODPPF_MODIFY))
++ if (!(uf->flags & XFLOWPF_MODIFY))
goto error;
/* Swap actions. */
}
/* Fetch stats, then clear them if necessary. */
- spin_lock_irqsave(&flow->lock, flags);
- get_stats(flow, &stats);
- if (uf.flags & XFLOWPF_ZERO_STATS)
+ spin_lock_bh(&flow->lock);
+ get_stats(flow, stats);
- if (uf->flags & ODPPF_ZERO_STATS)
++ if (uf->flags & XFLOWPF_ZERO_STATS)
clear_stats(flow);
- spin_unlock_irqrestore(&flow->lock, flags);
+ spin_unlock_bh(&flow->lock);
}
- /* Copy stats to userspace. */
- if (__copy_to_user(&ufp->flow.stats, &stats,
- sizeof(struct xflow_flow_stats)))
- return -EFAULT;
return 0;
error_free_flow_acts:
return error;
}
- static int put_actions(const struct sw_flow *flow, struct xflow_flow __user *ufp)
-static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp)
++static int put_flow(struct datapath *dp, struct xflow_flow_put __user *ufp)
+ {
- struct odp_flow_stats stats;
- struct odp_flow_put uf;
++ struct xflow_flow_stats stats;
++ struct xflow_flow_put uf;
+ int error;
+
- if (copy_from_user(&uf, ufp, sizeof(struct odp_flow_put)))
++ if (copy_from_user(&uf, ufp, sizeof(struct xflow_flow_put)))
+ return -EFAULT;
+
+ error = do_put_flow(dp, &uf, &stats);
+ if (error)
+ return error;
+
+ if (copy_to_user(&ufp->flow.stats, &stats,
- sizeof(struct odp_flow_stats)))
++ sizeof(struct xflow_flow_stats)))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ static int do_answer_query(struct sw_flow *flow, u32 query_flags,
- struct odp_flow_stats __user *ustats,
- union odp_action __user *actions,
++ struct xflow_flow_stats __user *ustats,
++ union xflow_action __user *actions,
+ u32 __user *n_actionsp)
{
- union xflow_action __user *actions;
struct sw_flow_actions *sf_acts;
- struct odp_flow_stats stats;
++ struct xflow_flow_stats stats;
u32 n_actions;
- if (__get_user(actions, &ufp->actions) ||
- __get_user(n_actions, &ufp->n_actions))
+ spin_lock_bh(&flow->lock);
+ get_stats(flow, &stats);
- if (query_flags & ODPFF_ZERO_TCP_FLAGS)
++ if (query_flags & XFLOWFF_ZERO_TCP_FLAGS)
+ flow->tcp_flags = 0;
+
+ spin_unlock_bh(&flow->lock);
+
- if (copy_to_user(ustats, &stats, sizeof(struct odp_flow_stats)) ||
++ if (copy_to_user(ustats, &stats, sizeof(struct xflow_flow_stats)) ||
+ get_user(n_actions, n_actionsp))
return -EFAULT;
if (!n_actions)
return 0;
sf_acts = rcu_dereference(flow->sf_acts);
- if (__put_user(sf_acts->n_actions, &ufp->n_actions) ||
+ if (put_user(sf_acts->n_actions, n_actionsp) ||
(actions && copy_to_user(actions, sf_acts->actions,
- sizeof(union odp_action) *
+ sizeof(union xflow_action) *
min(sf_acts->n_actions, n_actions))))
return -EFAULT;
}
static int answer_query(struct sw_flow *flow, u32 query_flags,
- struct odp_flow __user *ufp)
+ struct xflow_flow __user *ufp)
{
- struct xflow_flow_stats stats;
- unsigned long int flags;
- union odp_action *actions;
++ union xflow_action *actions;
- spin_lock_irqsave(&flow->lock, flags);
- get_stats(flow, &stats);
+ if (get_user(actions, &ufp->actions))
+ return -EFAULT;
- if (query_flags & XFLOWFF_ZERO_TCP_FLAGS) {
- flow->tcp_flags = 0;
- }
- spin_unlock_irqrestore(&flow->lock, flags);
+ return do_answer_query(flow, query_flags,
+ &ufp->stats, actions, &ufp->n_actions);
+ }
- if (__copy_to_user(&ufp->stats, &stats, sizeof(struct xflow_flow_stats)))
- return -EFAULT;
- return put_actions(flow, ufp);
-static struct sw_flow *do_del_flow(struct datapath *dp, struct odp_flow_key *key)
++static struct sw_flow *do_del_flow(struct datapath *dp, struct xflow_key *key)
+ {
+ struct tbl *table = rcu_dereference(dp->table);
+ struct tbl_node *flow_node;
+ int error;
+
- memset(key->reserved, 0, sizeof key->reserved);
+ flow_node = tbl_lookup(table, key, flow_hash(key), flow_cmp);
+ if (!flow_node)
+ return ERR_PTR(-ENOENT);
+
+ error = tbl_remove(table, flow_node);
+ if (error)
+ return ERR_PTR(error);
+
+ /* XXX Returned flow_node's statistics might lose a few packets, since
+ * other CPUs can be using this flow. We used to synchronize_rcu() to
+ * make sure that we get completely accurate stats, but that blows our
+ * performance, badly. */
+ return flow_cast(flow_node);
}
-static int del_flow(struct datapath *dp, struct odp_flow __user *ufp)
+static int del_flow(struct datapath *dp, struct xflow_flow __user *ufp)
{
- struct dp_table *table = rcu_dereference(dp->table);
- struct xflow_flow uf;
struct sw_flow *flow;
- struct odp_flow uf;
++ struct xflow_flow uf;
int error;
- error = -EFAULT;
if (copy_from_user(&uf, ufp, sizeof uf))
- goto error;
-
- flow = dp_table_lookup(table, &uf.key);
- error = -ENOENT;
- if (!flow)
- goto error;
+ return -EFAULT;
- /* XXX redundant lookup */
- error = dp_table_delete(table, flow);
- if (error)
- goto error;
+ flow = do_del_flow(dp, &uf.key);
+ if (IS_ERR(flow))
+ return PTR_ERR(flow);
- /* XXX These statistics might lose a few packets, since other CPUs can
- * be using this flow. We used to synchronize_rcu() to make sure that
- * we get completely accurate stats, but that blows our performance,
- * badly. */
- dp->n_flows--;
error = answer_query(flow, 0, ufp);
flow_deferred_free(flow);
-
- error:
return error;
}
- static int query_flows(struct datapath *dp, const struct xflow_flowvec *flowvec)
-static int do_query_flows(struct datapath *dp, const struct odp_flowvec *flowvec)
++static int do_query_flows(struct datapath *dp, const struct xflow_flowvec *flowvec)
{
- struct dp_table *table = rcu_dereference(dp->table);
- int i;
+ struct tbl *table = rcu_dereference(dp->table);
+ u32 i;
+
for (i = 0; i < flowvec->n_flows; i++) {
- struct __user xflow_flow *ufp = &flowvec->flows[i];
- struct odp_flow __user *ufp = &flowvec->flows[i];
- struct odp_flow uf;
++ struct xflow_flow __user *ufp = &flowvec->flows[i];
+ struct xflow_flow uf;
- struct sw_flow *flow;
+ struct tbl_node *flow_node;
int error;
- if (__copy_from_user(&uf, ufp, sizeof uf))
+ if (copy_from_user(&uf, ufp, sizeof uf))
return -EFAULT;
- memset(uf.key.reserved, 0, sizeof uf.key.reserved);
- flow = dp_table_lookup(table, &uf.key);
- if (!flow)
- error = __put_user(ENOENT, &ufp->stats.error);
+ flow_node = tbl_lookup(table, &uf.key, flow_hash(&uf.key), flow_cmp);
+ if (!flow_node)
+ error = put_user(ENOENT, &ufp->stats.error);
else
- error = answer_query(flow, uf.flags, ufp);
+ error = answer_query(flow_cast(flow_node), uf.flags, ufp);
if (error)
return -EFAULT;
}
}
struct list_flows_cbdata {
- struct odp_flow __user *uflows;
+ struct xflow_flow __user *uflows;
- int n_flows;
- int listed_flows;
+ u32 n_flows;
+ u32 listed_flows;
};
- static int list_flow(struct sw_flow *flow, void *cbdata_)
+ static int list_flow(struct tbl_node *node, void *cbdata_)
{
+ struct sw_flow *flow = flow_cast(node);
struct list_flows_cbdata *cbdata = cbdata_;
- struct odp_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++];
+ struct xflow_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++];
int error;
- if (__copy_to_user(&ufp->key, &flow->key, sizeof flow->key))
+ if (copy_to_user(&ufp->key, &flow->key, sizeof flow->key))
return -EFAULT;
error = answer_query(flow, 0, ufp);
if (error)
return 0;
}
- static int list_flows(struct datapath *dp, const struct xflow_flowvec *flowvec)
-static int do_list_flows(struct datapath *dp, const struct odp_flowvec *flowvec)
++static int do_list_flows(struct datapath *dp, const struct xflow_flowvec *flowvec)
{
struct list_flows_cbdata cbdata;
int error;
static int do_flowvec_ioctl(struct datapath *dp, unsigned long argp,
int (*function)(struct datapath *,
- const struct odp_flowvec *))
+ const struct xflow_flowvec *))
{
- struct odp_flowvec __user *uflowvec;
- struct odp_flowvec flowvec;
+ struct xflow_flowvec __user *uflowvec;
+ struct xflow_flowvec flowvec;
int retval;
- uflowvec = (struct odp_flowvec __user *)argp;
+ uflowvec = (struct xflow_flowvec __user *)argp;
- if (!access_ok(VERIFY_WRITE, uflowvec, sizeof *uflowvec) ||
- copy_from_user(&flowvec, uflowvec, sizeof flowvec))
+ if (copy_from_user(&flowvec, uflowvec, sizeof flowvec))
return -EFAULT;
- if (flowvec.n_flows > INT_MAX / sizeof(struct odp_flow))
+ if (flowvec.n_flows > INT_MAX / sizeof(struct xflow_flow))
return -EINVAL;
- if (!access_ok(VERIFY_WRITE, flowvec.flows,
- flowvec.n_flows * sizeof(struct xflow_flow)))
- return -EFAULT;
-
retval = function(dp, &flowvec);
return (retval < 0 ? retval
: retval == flowvec.n_flows ? 0
- : __put_user(retval, &uflowvec->n_flows));
+ : put_user(retval, &uflowvec->n_flows));
}
- static int do_execute(struct datapath *dp, const struct xflow_execute *executep)
-static int do_execute(struct datapath *dp, const struct odp_execute *execute)
++static int do_execute(struct datapath *dp, const struct xflow_execute *execute)
{
- struct xflow_execute execute;
- struct odp_flow_key key;
+ struct xflow_key key;
struct sk_buff *skb;
struct sw_flow_actions *actions;
struct ethhdr *eth;
return err;
}
-static int execute_packet(struct datapath *dp, const struct odp_execute __user *executep)
++static int execute_packet(struct datapath *dp, const struct xflow_execute __user *executep)
+ {
- struct odp_execute execute;
++ struct xflow_execute execute;
+
+ if (copy_from_user(&execute, executep, sizeof execute))
+ return -EFAULT;
+
+ return do_execute(dp, &execute);
+ }
+
-static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp)
+static int get_dp_stats(struct datapath *dp, struct xflow_stats __user *statsp)
{
- struct odp_stats stats;
+ struct tbl *table = rcu_dereference(dp->table);
+ struct xflow_stats stats;
int i;
- stats.n_flows = dp->n_flows;
- stats.cur_capacity = rcu_dereference(dp->table)->n_buckets;
- stats.max_capacity = DP_MAX_BUCKETS;
+ stats.n_flows = tbl_count(table);
+ stats.cur_capacity = tbl_n_buckets(table);
+ stats.max_capacity = TBL_MAX_BUCKETS;
stats.n_ports = dp->n_ports;
stats.max_ports = DP_MAX_PORTS;
stats.max_groups = DP_MAX_GROUPS;
}
static int
- put_port(const struct net_bridge_port *p, struct xflow_port __user *uop)
-put_port(const struct dp_port *p, struct odp_port __user *uop)
++put_port(const struct dp_port *p, struct xflow_port __user *uop)
{
- struct odp_port op;
+ struct xflow_port op;
+
memset(&op, 0, sizeof op);
- strncpy(op.devname, p->dev->name, sizeof op.devname);
+
+ rcu_read_lock();
+ strncpy(op.devname, vport_get_name(p->vport), sizeof op.devname);
+ rcu_read_unlock();
+
op.port = p->port_no;
- op.flags = is_dp_dev(p->dev) ? XFLOW_PORT_INTERNAL : 0;
- op.flags = is_internal_vport(p->vport) ? ODP_PORT_INTERNAL : 0;
++ op.flags = is_internal_vport(p->vport) ? XFLOW_PORT_INTERNAL : 0;
+
return copy_to_user(uop, &op, sizeof op) ? -EFAULT : 0;
}
}
static int
- list_ports(struct datapath *dp, struct xflow_portvec __user *pvp)
-do_list_ports(struct datapath *dp, struct odp_port __user *uports, int n_ports)
++do_list_ports(struct datapath *dp, struct xflow_port __user *uports, int n_ports)
{
- struct xflow_portvec pv;
- struct net_bridge_port *p;
- int idx;
+ int idx = 0;
+ if (n_ports) {
+ struct dp_port *p;
- if (copy_from_user(&pv, pvp, sizeof pv))
- return -EFAULT;
-
- idx = 0;
- if (pv.n_ports) {
list_for_each_entry_rcu (p, &dp->port_list, node) {
- if (put_port(p, &pv.ports[idx]))
+ if (put_port(p, &uports[idx]))
return -EFAULT;
- if (idx++ >= pv.n_ports)
+ if (idx++ >= n_ports)
break;
}
}
- return put_user(dp->n_ports, &pvp->n_ports);
+ return idx;
+ }
+
+ static int
-list_ports(struct datapath *dp, struct odp_portvec __user *upv)
++list_ports(struct datapath *dp, struct xflow_portvec __user *upv)
+ {
- struct odp_portvec pv;
++ struct xflow_portvec pv;
+ int retval;
+
+ if (copy_from_user(&pv, upv, sizeof pv))
+ return -EFAULT;
+
+ retval = do_list_ports(dp, pv.ports, pv.n_ports);
+ if (retval < 0)
+ return retval;
+
+ return put_user(retval, &upv->n_ports);
}
/* RCU callback for freeing a dp_port_group */
}
static int
- get_port_group(struct datapath *dp, struct xflow_port_group *upg)
-set_port_group(struct datapath *dp, const struct odp_port_group __user *upg)
++set_port_group(struct datapath *dp, const struct xflow_port_group __user *upg)
{
- struct odp_port_group pg;
+ struct xflow_port_group pg;
- struct dp_port_group *g;
- u16 n_copy;
if (copy_from_user(&pg, upg, sizeof pg))
return -EFAULT;
return 0;
}
-static int get_port_group(struct datapath *dp, struct odp_port_group __user *upg)
++static int get_port_group(struct datapath *dp, struct xflow_port_group __user *upg)
+ {
- struct odp_port_group pg;
++ struct xflow_port_group pg;
+
+ if (copy_from_user(&pg, upg, sizeof pg))
+ return -EFAULT;
+
+ return do_get_port_group(dp, pg.ports, pg.n_ports, pg.group, &pg.n_ports);
+ }
+
static int get_listen_mask(const struct file *f)
{
return (long)f->private_data;
err = destroy_dp(dp_idx);
goto exit;
- case XFLOW_PORT_ADD:
- err = add_port(dp_idx, (struct xflow_port __user *)argp);
- case ODP_PORT_ATTACH:
- err = attach_port(dp_idx, (struct odp_port __user *)argp);
++ case XFLOW_PORT_ATTACH:
++ err = attach_port(dp_idx, (struct xflow_port __user *)argp);
goto exit;
- case XFLOW_PORT_DEL:
- case ODP_PORT_DETACH:
++ case XFLOW_PORT_DETACH:
err = get_user(port_no, (int __user *)argp);
if (!err)
- err = del_port(dp_idx, port_no);
+ err = detach_port(dp_idx, port_no);
+ goto exit;
+
- case ODP_VPORT_ADD:
- err = vport_add((struct odp_vport_add __user *)argp);
++ case XFLOW_VPORT_ADD:
++ err = vport_add((struct xflow_vport_add __user *)argp);
+ goto exit;
+
- case ODP_VPORT_MOD:
- err = vport_mod((struct odp_vport_mod __user *)argp);
++ case XFLOW_VPORT_MOD:
++ err = vport_mod((struct xflow_vport_mod __user *)argp);
+ goto exit;
+
- case ODP_VPORT_DEL:
++ case XFLOW_VPORT_DEL:
+ err = vport_del((char __user *)argp);
+ goto exit;
+
- case ODP_VPORT_STATS_GET:
- err = vport_stats_get((struct odp_vport_stats_req __user *)argp);
++ case XFLOW_VPORT_STATS_GET:
++ err = vport_stats_get((struct xflow_vport_stats_req __user *)argp);
+ goto exit;
+
- case ODP_VPORT_ETHER_GET:
- err = vport_ether_get((struct odp_vport_ether __user *)argp);
++ case XFLOW_VPORT_ETHER_GET:
++ err = vport_ether_get((struct xflow_vport_ether __user *)argp);
+ goto exit;
+
- case ODP_VPORT_ETHER_SET:
- err = vport_ether_set((struct odp_vport_ether __user *)argp);
++ case XFLOW_VPORT_ETHER_SET:
++ err = vport_ether_set((struct xflow_vport_ether __user *)argp);
+ goto exit;
+
- case ODP_VPORT_MTU_GET:
- err = vport_mtu_get((struct odp_vport_mtu __user *)argp);
++ case XFLOW_VPORT_MTU_GET:
++ err = vport_mtu_get((struct xflow_vport_mtu __user *)argp);
+ goto exit;
+
- case ODP_VPORT_MTU_SET:
- err = vport_mtu_set((struct odp_vport_mtu __user *)argp);
++ case XFLOW_VPORT_MTU_SET:
++ err = vport_mtu_set((struct xflow_vport_mtu __user *)argp);
goto exit;
}
err = flush_flows(dp);
break;
- case ODP_FLOW_PUT:
- err = put_flow(dp, (struct odp_flow_put __user *)argp);
+ case XFLOW_FLOW_PUT:
+ err = put_flow(dp, (struct xflow_flow_put __user *)argp);
break;
- case ODP_FLOW_DEL:
- err = del_flow(dp, (struct odp_flow __user *)argp);
+ case XFLOW_FLOW_DEL:
+ err = del_flow(dp, (struct xflow_flow __user *)argp);
break;
- case ODP_FLOW_GET:
+ case XFLOW_FLOW_GET:
- err = do_flowvec_ioctl(dp, argp, query_flows);
+ err = do_flowvec_ioctl(dp, argp, do_query_flows);
break;
- case ODP_FLOW_LIST:
+ case XFLOW_FLOW_LIST:
- err = do_flowvec_ioctl(dp, argp, list_flows);
+ err = do_flowvec_ioctl(dp, argp, do_list_flows);
break;
- case ODP_EXECUTE:
- err = execute_packet(dp, (struct odp_execute __user *)argp);
+ case XFLOW_EXECUTE:
- err = do_execute(dp, (struct xflow_execute __user *)argp);
++ err = execute_packet(dp, (struct xflow_execute __user *)argp);
break;
default:
return 0;
}
-static int compat_list_ports(struct datapath *dp, struct compat_odp_portvec __user *upv)
+ #ifdef CONFIG_COMPAT
- struct compat_odp_portvec pv;
++static int compat_list_ports(struct datapath *dp, struct compat_xflow_portvec __user *upv)
+ {
-static int compat_set_port_group(struct datapath *dp, const struct compat_odp_port_group __user *upg)
++ struct compat_xflow_portvec pv;
+ int retval;
+
+ if (copy_from_user(&pv, upv, sizeof pv))
+ return -EFAULT;
+
+ retval = do_list_ports(dp, compat_ptr(pv.ports), pv.n_ports);
+ if (retval < 0)
+ return retval;
+
+ return put_user(retval, &upv->n_ports);
+ }
+
- struct compat_odp_port_group pg;
++static int compat_set_port_group(struct datapath *dp, const struct compat_xflow_port_group __user *upg)
+ {
-static int compat_get_port_group(struct datapath *dp, struct compat_odp_port_group __user *upg)
++ struct compat_xflow_port_group pg;
+
+ if (copy_from_user(&pg, upg, sizeof pg))
+ return -EFAULT;
+
+ return do_set_port_group(dp, compat_ptr(pg.ports), pg.n_ports, pg.group);
+ }
+
- struct compat_odp_port_group pg;
++static int compat_get_port_group(struct datapath *dp, struct compat_xflow_port_group __user *upg)
+ {
-static int compat_get_flow(struct odp_flow *flow, const struct compat_odp_flow __user *compat)
++ struct compat_xflow_port_group pg;
+
+ if (copy_from_user(&pg, upg, sizeof pg))
+ return -EFAULT;
+
+ return do_get_port_group(dp, compat_ptr(pg.ports), pg.n_ports,
+ pg.group, &pg.n_ports);
+ }
+
- if (!access_ok(VERIFY_READ, compat, sizeof(struct compat_odp_flow)) ||
- __copy_from_user(&flow->stats, &compat->stats, sizeof(struct odp_flow_stats)) ||
- __copy_from_user(&flow->key, &compat->key, sizeof(struct odp_flow_key)) ||
++static int compat_get_flow(struct xflow_flow *flow, const struct compat_xflow_flow __user *compat)
+ {
+ compat_uptr_t actions;
+
-static int compat_put_flow(struct datapath *dp, struct compat_odp_flow_put __user *ufp)
++ if (!access_ok(VERIFY_READ, compat, sizeof(struct compat_xflow_flow)) ||
++ __copy_from_user(&flow->stats, &compat->stats, sizeof(struct xflow_flow_stats)) ||
++ __copy_from_user(&flow->key, &compat->key, sizeof(struct xflow_key)) ||
+ __get_user(actions, &compat->actions) ||
+ __get_user(flow->n_actions, &compat->n_actions) ||
+ __get_user(flow->flags, &compat->flags))
+ return -EFAULT;
+
+ flow->actions = compat_ptr(actions);
+ return 0;
+ }
+
- struct odp_flow_stats stats;
- struct odp_flow_put fp;
++static int compat_put_flow(struct datapath *dp, struct compat_xflow_flow_put __user *ufp)
+ {
- sizeof(struct odp_flow_stats)))
++ struct xflow_flow_stats stats;
++ struct xflow_flow_put fp;
+ int error;
+
+ if (compat_get_flow(&fp.flow, &ufp->flow) ||
+ get_user(fp.flags, &ufp->flags))
+ return -EFAULT;
+
+ error = do_put_flow(dp, &fp, &stats);
+ if (error)
+ return error;
+
+ if (copy_to_user(&ufp->flow.stats, &stats,
- struct compat_odp_flow __user *ufp)
++ sizeof(struct xflow_flow_stats)))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ static int compat_answer_query(struct sw_flow *flow, u32 query_flags,
-static int compat_del_flow(struct datapath *dp, struct compat_odp_flow __user *ufp)
++ struct compat_xflow_flow __user *ufp)
+ {
+ compat_uptr_t actions;
+
+ if (get_user(actions, &ufp->actions))
+ return -EFAULT;
+
+ return do_answer_query(flow, query_flags, &ufp->stats,
+ compat_ptr(actions), &ufp->n_actions);
+ }
+
- struct odp_flow uf;
++static int compat_del_flow(struct datapath *dp, struct compat_xflow_flow __user *ufp)
+ {
+ struct sw_flow *flow;
-static int compat_query_flows(struct datapath *dp, struct compat_odp_flow *flows, u32 n_flows)
++ struct xflow_flow uf;
+ int error;
+
+ if (compat_get_flow(&uf, ufp))
+ return -EFAULT;
+
+ flow = do_del_flow(dp, &uf.key);
+ if (IS_ERR(flow))
+ return PTR_ERR(flow);
+
+ error = compat_answer_query(flow, 0, ufp);
+ flow_deferred_free(flow);
+ return error;
+ }
+
- struct compat_odp_flow __user *ufp = &flows[i];
- struct odp_flow uf;
++static int compat_query_flows(struct datapath *dp, struct compat_xflow_flow *flows, u32 n_flows)
+ {
+ struct tbl *table = rcu_dereference(dp->table);
+ u32 i;
+
+ for (i = 0; i < n_flows; i++) {
- struct compat_odp_flow __user *uflows;
++ struct compat_xflow_flow __user *ufp = &flows[i];
++ struct xflow_flow uf;
+ struct tbl_node *flow_node;
+ int error;
+
+ if (compat_get_flow(&uf, ufp))
+ return -EFAULT;
+ memset(uf.key.reserved, 0, sizeof uf.key.reserved);
+
+ flow_node = tbl_lookup(table, &uf.key, flow_hash(&uf.key), flow_cmp);
+ if (!flow_node)
+ error = put_user(ENOENT, &ufp->stats.error);
+ else
+ error = compat_answer_query(flow_cast(flow_node), uf.flags, ufp);
+ if (error)
+ return -EFAULT;
+ }
+ return n_flows;
+ }
+
+ struct compat_list_flows_cbdata {
- struct compat_odp_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++];
++ struct compat_xflow_flow __user *uflows;
+ u32 n_flows;
+ u32 listed_flows;
+ };
+
+ static int compat_list_flow(struct tbl_node *node, void *cbdata_)
+ {
+ struct sw_flow *flow = flow_cast(node);
+ struct compat_list_flows_cbdata *cbdata = cbdata_;
-static int compat_list_flows(struct datapath *dp, struct compat_odp_flow *flows, u32 n_flows)
++ struct compat_xflow_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++];
+ int error;
+
+ if (copy_to_user(&ufp->key, &flow->key, sizeof flow->key))
+ return -EFAULT;
+ error = compat_answer_query(flow, 0, ufp);
+ if (error)
+ return error;
+
+ if (cbdata->listed_flows >= cbdata->n_flows)
+ return cbdata->listed_flows;
+ return 0;
+ }
+
- struct compat_odp_flow *,
++static int compat_list_flows(struct datapath *dp, struct compat_xflow_flow *flows, u32 n_flows)
+ {
+ struct compat_list_flows_cbdata cbdata;
+ int error;
+
+ if (!n_flows)
+ return 0;
+
+ cbdata.uflows = flows;
+ cbdata.n_flows = n_flows;
+ cbdata.listed_flows = 0;
+ error = tbl_foreach(rcu_dereference(dp->table), compat_list_flow, &cbdata);
+ return error ? error : cbdata.listed_flows;
+ }
+
+ static int compat_flowvec_ioctl(struct datapath *dp, unsigned long argp,
+ int (*function)(struct datapath *,
- struct compat_odp_flowvec __user *uflowvec;
- struct compat_odp_flow __user *flows;
- struct compat_odp_flowvec flowvec;
++ struct compat_xflow_flow *,
+ u32 n_flows))
+ {
- if (flowvec.n_flows > INT_MAX / sizeof(struct compat_odp_flow))
++ struct compat_xflow_flowvec __user *uflowvec;
++ struct compat_xflow_flow __user *flows;
++ struct compat_xflow_flowvec flowvec;
+ int retval;
+
+ uflowvec = compat_ptr(argp);
+ if (!access_ok(VERIFY_WRITE, uflowvec, sizeof *uflowvec) ||
+ copy_from_user(&flowvec, uflowvec, sizeof flowvec))
+ return -EFAULT;
+
- flowvec.n_flows * sizeof(struct compat_odp_flow)))
++ if (flowvec.n_flows > INT_MAX / sizeof(struct compat_xflow_flow))
+ return -EINVAL;
+
+ flows = compat_ptr(flowvec.flows);
+ if (!access_ok(VERIFY_WRITE, flows,
-static int compat_execute(struct datapath *dp, const struct compat_odp_execute __user *uexecute)
++ flowvec.n_flows * sizeof(struct compat_xflow_flow)))
+ return -EFAULT;
+
+ retval = function(dp, flows, flowvec.n_flows);
+ return (retval < 0 ? retval
+ : retval == flowvec.n_flows ? 0
+ : put_user(retval, &uflowvec->n_flows));
+ }
+
- struct odp_execute execute;
++static int compat_execute(struct datapath *dp, const struct compat_xflow_execute __user *uexecute)
+ {
- if (!access_ok(VERIFY_READ, uexecute, sizeof(struct compat_odp_execute)) ||
++ struct xflow_execute execute;
+ compat_uptr_t actions;
+ compat_uptr_t data;
+
- case ODP_DP_DESTROY:
- case ODP_FLOW_FLUSH:
++ if (!access_ok(VERIFY_READ, uexecute, sizeof(struct compat_xflow_execute)) ||
+ __get_user(execute.in_port, &uexecute->in_port) ||
+ __get_user(actions, &uexecute->actions) ||
+ __get_user(execute.n_actions, &uexecute->n_actions) ||
+ __get_user(data, &uexecute->data) ||
+ __get_user(execute.length, &uexecute->length))
+ return -EFAULT;
+
+ execute.actions = compat_ptr(actions);
+ execute.data = compat_ptr(data);
+
+ return do_execute(dp, &execute);
+ }
+
+ static long openvswitch_compat_ioctl(struct file *f, unsigned int cmd, unsigned long argp)
+ {
+ int dp_idx = iminor(f->f_dentry->d_inode);
+ struct datapath *dp;
+ int err;
+
+ switch (cmd) {
- case ODP_DP_CREATE:
- case ODP_PORT_ATTACH:
- case ODP_PORT_DETACH:
- case ODP_VPORT_DEL:
- case ODP_VPORT_MTU_SET:
- case ODP_VPORT_MTU_GET:
- case ODP_VPORT_ETHER_SET:
- case ODP_VPORT_ETHER_GET:
- case ODP_VPORT_STATS_GET:
- case ODP_DP_STATS:
- case ODP_GET_DROP_FRAGS:
- case ODP_SET_DROP_FRAGS:
- case ODP_SET_LISTEN_MASK:
- case ODP_GET_LISTEN_MASK:
- case ODP_SET_SFLOW_PROBABILITY:
- case ODP_GET_SFLOW_PROBABILITY:
- case ODP_PORT_QUERY:
++ case XFLOW_DP_DESTROY:
++ case XFLOW_FLOW_FLUSH:
+ /* Ioctls that don't need any translation at all. */
+ return openvswitch_ioctl(f, cmd, argp);
+
- case ODP_VPORT_ADD32:
++ case XFLOW_DP_CREATE:
++ case XFLOW_PORT_ATTACH:
++ case XFLOW_PORT_DETACH:
++ case XFLOW_VPORT_DEL:
++ case XFLOW_VPORT_MTU_SET:
++ case XFLOW_VPORT_MTU_GET:
++ case XFLOW_VPORT_ETHER_SET:
++ case XFLOW_VPORT_ETHER_GET:
++ case XFLOW_VPORT_STATS_GET:
++ case XFLOW_DP_STATS:
++ case XFLOW_GET_DROP_FRAGS:
++ case XFLOW_SET_DROP_FRAGS:
++ case XFLOW_SET_LISTEN_MASK:
++ case XFLOW_GET_LISTEN_MASK:
++ case XFLOW_SET_SFLOW_PROBABILITY:
++ case XFLOW_GET_SFLOW_PROBABILITY:
++ case XFLOW_PORT_QUERY:
+ /* Ioctls that just need their pointer argument extended. */
+ return openvswitch_ioctl(f, cmd, (unsigned long)compat_ptr(argp));
+
- case ODP_VPORT_MOD32:
++ case XFLOW_VPORT_ADD32:
+ return compat_vport_add(compat_ptr(argp));
+
- case ODP_PORT_LIST32:
++ case XFLOW_VPORT_MOD32:
+ return compat_vport_mod(compat_ptr(argp));
+ }
+
+ dp = get_dp_locked(dp_idx);
+ err = -ENODEV;
+ if (!dp)
+ goto exit;
+
+ switch (cmd) {
- case ODP_PORT_GROUP_SET32:
++ case XFLOW_PORT_LIST32:
+ err = compat_list_ports(dp, compat_ptr(argp));
+ break;
+
- case ODP_PORT_GROUP_GET32:
++ case XFLOW_PORT_GROUP_SET32:
+ err = compat_set_port_group(dp, compat_ptr(argp));
+ break;
+
- case ODP_FLOW_PUT32:
++ case XFLOW_PORT_GROUP_GET32:
+ err = compat_get_port_group(dp, compat_ptr(argp));
+ break;
+
- case ODP_FLOW_DEL32:
++ case XFLOW_FLOW_PUT32:
+ err = compat_put_flow(dp, compat_ptr(argp));
+ break;
+
- case ODP_FLOW_GET32:
++ case XFLOW_FLOW_DEL32:
+ err = compat_del_flow(dp, compat_ptr(argp));
+ break;
+
- case ODP_FLOW_LIST32:
++ case XFLOW_FLOW_GET32:
+ err = compat_flowvec_ioctl(dp, argp, compat_query_flows);
+ break;
+
- case ODP_EXECUTE32:
++ case XFLOW_FLOW_LIST32:
+ err = compat_flowvec_ioctl(dp, argp, compat_list_flows);
+ break;
+
++ case XFLOW_EXECUTE32:
+ err = compat_execute(dp, compat_ptr(argp));
+ break;
+
+ default:
+ err = -ENOIOCTLCMD;
+ break;
+ }
+ mutex_unlock(&dp->mutex);
+ exit:
+ return err;
+ }
+ #endif
+
ssize_t openvswitch_read(struct file *f, char __user *buf, size_t nbytes,
loff_t *ppos)
{
* @waitqueue: Waitqueue, for waiting for new packets in @queues.
* @n_flows: Number of flows currently in flow table.
* @table: Current flow table (RCU protected).
- * @groups: Port groups, used by ODPAT_OUTPUT_GROUP action (RCU protected).
+ * @groups: Port groups, used by XFLOWAT_OUTPUT_GROUP action (RCU protected).
* @n_ports: Number of ports currently in @ports.
- * @ports: Map from port number to &struct net_bridge_port. %XFLOWP_LOCAL port
- * @ports: Map from port number to &struct dp_port. %ODPP_LOCAL port
++ * @ports: Map from port number to &struct dp_port. %XFLOWP_LOCAL port
* always exists, other ports may be %NULL.
* @port_list: List of all ports in @ports in arbitrary order.
* @stats_percpu: Per-CPU datapath statistics.
static ssize_t show_bridge_id(DEVICE_PARAMS, char *buf)
{
- struct datapath *dp = dp_dev_get_dp(to_net_dev(d));
- const unsigned char *addr = dp->ports[XFLOWP_LOCAL]->dev->dev_addr;
+ struct datapath *dp = sysfs_get_dp(to_net_dev(d));
- const unsigned char *addr = vport_get_addr(dp->ports[ODPP_LOCAL]->vport);
++ const unsigned char *addr = vport_get_addr(dp->ports[XFLOWP_LOCAL]->vport);
/* xxx Do we need a lock of some sort? */
return sprintf(buf, "%.2x%.2x.%.2x%.2x%.2x%.2x%.2x%.2x\n",
*/
int dp_sysfs_add_dp(struct datapath *dp)
{
- struct kobject *kobj = &dp->ports[XFLOWP_LOCAL]->dev->NETDEV_DEV_MEMBER.kobj;
- struct kobject *kobj = vport_get_kobj(dp->ports[ODPP_LOCAL]->vport);
++ struct kobject *kobj = vport_get_kobj(dp->ports[XFLOWP_LOCAL]->vport);
int err;
/* Create /sys/class/net/<devname>/bridge directory. */
int dp_sysfs_del_dp(struct datapath *dp)
{
- struct kobject *kobj = &dp->ports[XFLOWP_LOCAL]->dev->NETDEV_DEV_MEMBER.kobj;
- struct kobject *kobj = vport_get_kobj(dp->ports[ODPP_LOCAL]->vport);
++ struct kobject *kobj = vport_get_kobj(dp->ports[XFLOWP_LOCAL]->vport);
kobject_del(&dp->ifobj);
sysfs_remove_group(kobj, &bridge_group);
/* Create symlink from /sys/class/net/<devname>/brport/bridge to
* /sys/class/net/<bridgename>. */
- err = sysfs_create_link(&p->kobj,
- &dp->ports[XFLOWP_LOCAL]->dev->NETDEV_DEV_MEMBER.kobj,
- err = sysfs_create_link(&p->kobj, vport_get_kobj(dp->ports[ODPP_LOCAL]->vport),
++ err = sysfs_create_link(&p->kobj, vport_get_kobj(dp->ports[XFLOWP_LOCAL]->vport),
SYSFS_BRIDGE_PORT_LINK); /* "bridge" */
if (err)
goto err_del;
int nh_ofs;
memset(key, 0, sizeof *key);
+ key->tun_id = OVS_CB(skb)->tun_id;
key->in_port = in_port;
- key->dl_vlan = htons(ODP_VLAN_NONE);
++ key->dl_tci = htons(0);
if (skb->len < sizeof *eth)
return 0;
return retval;
}
-u32 flow_hash(const struct odp_flow_key *key)
+ struct sw_flow *flow_cast(const struct tbl_node *node)
+ {
+ return container_of(node, struct sw_flow, tbl_node);
+ }
+
- const struct odp_flow_key *key1 = &flow_cast(node)->key;
- const struct odp_flow_key *key2 = key2_;
++u32 flow_hash(const struct xflow_key *key)
+ {
+ return jhash2((u32*)key, sizeof *key / sizeof(u32), hash_seed);
+ }
+
+ int flow_cmp(const struct tbl_node *node, void *key2_)
+ {
- return !memcmp(key1, key2, sizeof(struct odp_flow_key));
++ const struct xflow_key *key1 = &flow_cast(node)->key;
++ const struct xflow_key *key2 = key2_;
+
++ return !memcmp(key1, key2, sizeof(struct xflow_key));
+ }
+
/* Initializes the flow module.
* Returns zero if successful or a negative error code. */
int flow_init(void)
#include <linux/rcupdate.h>
#include <linux/gfp.h>
-#include "openvswitch/datapath-protocol.h"
+#include "openvswitch/xflow.h"
+ #include "table.h"
struct sk_buff;
struct sw_flow {
struct rcu_head rcu;
- struct odp_flow_key key;
+ struct tbl_node tbl_node;
+
+ struct xflow_key key;
struct sw_flow_actions *sf_acts;
struct timespec used; /* Last used time. */
extern struct kmem_cache *flow_cache;
struct sw_flow_actions *flow_actions_alloc(size_t n_actions);
- void flow_free(struct sw_flow *);
void flow_deferred_free(struct sw_flow *);
void flow_deferred_free_acts(struct sw_flow_actions *);
-int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *);
+int flow_extract(struct sk_buff *, u16 in_port, struct xflow_key *);
void flow_used(struct sw_flow *, struct sk_buff *);
-u32 flow_hash(const struct odp_flow_key *key);
+ struct sw_flow *flow_cast(const struct tbl_node *);
++u32 flow_hash(const struct xflow_key *key);
+ int flow_cmp(const struct tbl_node *, void *target);
+ void flow_free_tbl(struct tbl_node *);
+
int flow_init(void);
void flow_exit(void);
linux-2.6/compat-2.6/include/linux/dmi.h \
linux-2.6/compat-2.6/include/linux/err.h \
linux-2.6/compat-2.6/include/linux/icmp.h \
+ linux-2.6/compat-2.6/include/linux/if.h \
linux-2.6/compat-2.6/include/linux/if_arp.h \
+ linux-2.6/compat-2.6/include/linux/if_ether.h \
+ linux-2.6/compat-2.6/include/linux/if_vlan.h \
+ linux-2.6/compat-2.6/include/linux/in.h \
+ linux-2.6/compat-2.6/include/linux/inetdevice.h \
linux-2.6/compat-2.6/include/linux/ip.h \
linux-2.6/compat-2.6/include/linux/ipv6.h \
linux-2.6/compat-2.6/include/linux/jiffies.h \
--- /dev/null
-netdev_get_stats(const struct vport *vport, struct odp_vport_stats *stats)
+ /*
+ * Copyright (c) 2010 Nicira Networks.
+ * Distributed under the terms of the GNU GPL version 2.
+ *
+ * Significant portions of this file may be copied from parts of the Linux
+ * kernel, by Linus Torvalds and others.
+ */
+
+ #include <linux/if_arp.h>
+ #include <linux/if_bridge.h>
+ #include <linux/if_vlan.h>
+ #include <linux/kernel.h>
+ #include <linux/llc.h>
+ #include <linux/rtnetlink.h>
+ #include <linux/skbuff.h>
+
+ #include <net/llc.h>
+
+ #include "datapath.h"
+ #include "vport-internal_dev.h"
+ #include "vport-netdev.h"
+
+ #include "compat.h"
+
+ struct vport_ops netdev_vport_ops;
+
+ static void netdev_port_receive(struct net_bridge_port *, struct sk_buff *);
+
+ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27)
+ static struct llc_sap *netdev_stp_sap;
+
+ static int
+ netdev_stp_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
+ {
+ /* We don't really care about STP packets, we just listen for them for
+ * mutual exclusion with the bridge module, so this just discards
+ * them. */
+ kfree_skb(skb);
+ return 0;
+ }
+
+ static int
+ netdev_avoid_bridge_init(void)
+ {
+ /* Register to receive STP packets because the bridge module also
+ * attempts to do so. Since there can only be a single listener for a
+ * given protocol, this provides mutual exclusion against the bridge
+ * module, preventing both of them from being loaded at the same
+ * time. */
+ netdev_stp_sap = llc_sap_open(LLC_SAP_BSPAN, netdev_stp_rcv);
+ if (!netdev_stp_sap) {
+ printk(KERN_ERR "openvswitch: can't register sap for STP (probably the bridge module is loaded)\n");
+ return -EADDRINUSE;
+ }
+ return 0;
+ }
+
+ static void
+ netdev_avoid_bridge_exit(void)
+ {
+ llc_sap_put(netdev_stp_sap);
+ }
+ #else /* Linux 2.6.27 or later. */
+ static int
+ netdev_avoid_bridge_init(void)
+ {
+ /* Linux 2.6.27 introduces a way for multiple clients to register for
+ * STP packets, which interferes with what we try to do above.
+ * Instead, just check whether there's a bridge hook defined. This is
+ * not as safe--the bridge module is willing to load over the top of
+ * us--but it provides a little bit of protection. */
+ if (br_handle_frame_hook) {
+ printk(KERN_ERR "openvswitch: bridge module is loaded, cannot load over it\n");
+ return -EADDRINUSE;
+ }
+ return 0;
+ }
+
+ static void
+ netdev_avoid_bridge_exit(void)
+ {
+ /* Nothing to do. */
+ }
+ #endif /* Linux 2.6.27 or later */
+
+ /*
+ * Used as br_handle_frame_hook. (Cannot run bridge at the same time, even on
+ * different set of devices!)
+ */
+ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
+ /* Called with rcu_read_lock and bottom-halves disabled. */
+ static struct sk_buff *
+ netdev_frame_hook(struct net_bridge_port *p, struct sk_buff *skb)
+ {
+ netdev_port_receive(p, skb);
+ return NULL;
+ }
+ #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ /* Called with rcu_read_lock and bottom-halves disabled. */
+ static int
+ netdev_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb)
+ {
+ netdev_port_receive(p, *pskb);
+ return 1;
+ }
+ #else
+ #error
+ #endif
+
+ static int
+ netdev_init(void)
+ {
+ int err;
+
+ err = netdev_avoid_bridge_init();
+ if (err)
+ return err;
+
+ /* Hook into callback used by the bridge to intercept packets.
+ * Parasites we are. */
+ br_handle_frame_hook = netdev_frame_hook;
+
+ return 0;
+ }
+
+ static void
+ netdev_exit(void)
+ {
+ br_handle_frame_hook = NULL;
+ netdev_avoid_bridge_exit();
+ }
+
+ static struct vport *
+ netdev_create(const char *name, const void __user *config)
+ {
+ struct vport *vport;
+ struct netdev_vport *netdev_vport;
+ int err;
+
+ vport = vport_alloc(sizeof(struct netdev_vport), &netdev_vport_ops);
+ if (IS_ERR(vport)) {
+ err = PTR_ERR(vport);
+ goto error;
+ }
+
+ netdev_vport = netdev_vport_priv(vport);
+
+ netdev_vport->dev = dev_get_by_name(&init_net, name);
+ if (!netdev_vport->dev) {
+ err = -ENODEV;
+ goto error_free_vport;
+ }
+
+ if (netdev_vport->dev->flags & IFF_LOOPBACK ||
+ netdev_vport->dev->type != ARPHRD_ETHER ||
+ is_internal_dev(netdev_vport->dev)) {
+ err = -EINVAL;
+ goto error_put;
+ }
+
+ if (netdev_vport->dev->br_port) {
+ err = -EBUSY;
+ goto error_put;
+ }
+
+ return vport;
+
+ error_put:
+ dev_put(netdev_vport->dev);
+ error_free_vport:
+ vport_free(vport);
+ error:
+ return ERR_PTR(err);
+ }
+
+ static int
+ netdev_destroy(struct vport *vport)
+ {
+ struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+
+ dev_put(netdev_vport->dev);
+ vport_free(vport);
+
+ return 0;
+ }
+
+ static int
+ netdev_attach(struct vport *vport)
+ {
+ struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+
+ dev_set_promiscuity(netdev_vport->dev, 1);
+ dev_disable_lro(netdev_vport->dev);
+ rcu_assign_pointer(netdev_vport->dev->br_port, (struct net_bridge_port *)vport);
+
+ return 0;
+ }
+
+ static int
+ netdev_detach(struct vport *vport)
+ {
+ struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+
+ rcu_assign_pointer(netdev_vport->dev->br_port, NULL);
+ dev_set_promiscuity(netdev_vport->dev, -1);
+
+ return 0;
+ }
+
+ int
+ netdev_set_mtu(struct vport *vport, int mtu)
+ {
+ struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ return dev_set_mtu(netdev_vport->dev, mtu);
+ }
+
+ int
+ netdev_set_addr(struct vport *vport, const unsigned char *addr)
+ {
+ struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ struct sockaddr sa;
+
+ sa.sa_family = ARPHRD_ETHER;
+ memcpy(sa.sa_data, addr, ETH_ALEN);
+
+ return dev_set_mac_address(netdev_vport->dev, &sa);
+ }
+
+ const char *
+ netdev_get_name(const struct vport *vport)
+ {
+ const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ return netdev_vport->dev->name;
+ }
+
+ const unsigned char *
+ netdev_get_addr(const struct vport *vport)
+ {
+ const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ return netdev_vport->dev->dev_addr;
+ }
+
+ struct kobject *
+ netdev_get_kobj(const struct vport *vport)
+ {
+ const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ return &netdev_vport->dev->NETDEV_DEV_MEMBER.kobj;
+ }
+
+ int
++netdev_get_stats(const struct vport *vport, struct xflow_vport_stats *stats)
+ {
+ const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ const struct net_device_stats *netdev_stats;
+
+ netdev_stats = dev_get_stats(netdev_vport->dev);
+
+ stats->rx_bytes = netdev_stats->rx_bytes;
+ stats->rx_packets = netdev_stats->rx_packets;
+ stats->tx_bytes = netdev_stats->tx_bytes;
+ stats->tx_packets = netdev_stats->tx_packets;
+ stats->rx_dropped = netdev_stats->rx_dropped;
+ stats->rx_errors = netdev_stats->rx_errors;
+ stats->rx_frame_err = netdev_stats->rx_frame_errors;
+ stats->rx_over_err = netdev_stats->rx_over_errors;
+ stats->rx_crc_err = netdev_stats->rx_crc_errors;
+ stats->tx_dropped = netdev_stats->tx_dropped;
+ stats->tx_errors = netdev_stats->tx_errors;
+ stats->collisions = netdev_stats->collisions;
+
+ return 0;
+ }
+
+ unsigned
+ netdev_get_dev_flags(const struct vport *vport)
+ {
+ const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ return dev_get_flags(netdev_vport->dev);
+ }
+
+ int
+ netdev_is_running(const struct vport *vport)
+ {
+ const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ return netif_running(netdev_vport->dev);
+ }
+
+ unsigned char
+ netdev_get_operstate(const struct vport *vport)
+ {
+ const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ return netdev_vport->dev->operstate;
+ }
+
+ int
+ netdev_get_ifindex(const struct vport *vport)
+ {
+ const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ return netdev_vport->dev->ifindex;
+ }
+
+ int
+ netdev_get_iflink(const struct vport *vport)
+ {
+ const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ return netdev_vport->dev->iflink;
+ }
+
+ int
+ netdev_get_mtu(const struct vport *vport)
+ {
+ const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ return netdev_vport->dev->mtu;
+ }
+
+ /* Must be called with rcu_read_lock. */
+ static void
+ netdev_port_receive(struct net_bridge_port *p, struct sk_buff *skb)
+ {
+ struct vport *vport = (struct vport *)p;
+
+ /* Make our own copy of the packet. Otherwise we will mangle the
+ * packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
+ * (No one comes after us, since we tell handle_bridge() that we took
+ * the packet.) */
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ /* Push the Ethernet header back on. */
+ skb_push(skb, ETH_HLEN);
+ skb_reset_mac_header(skb);
+ compute_ip_summed(skb, false);
+
+ vport_receive(vport, skb);
+ }
+
+ static int
+ netdev_send(struct vport *vport, struct sk_buff *skb)
+ {
+ struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ int len = skb->len;
+
+ skb->dev = netdev_vport->dev;
+ forward_ip_summed(skb);
+ dev_queue_xmit(skb);
+
+ return len;
+ }
+
+ /* Returns null if this device is not attached to a datapath. */
+ struct vport *
+ netdev_get_vport(struct net_device *dev)
+ {
+ return (struct vport *)dev->br_port;
+ }
+
+ struct vport_ops netdev_vport_ops = {
+ .type = "netdev",
+ .flags = VPORT_F_REQUIRED,
+ .init = netdev_init,
+ .exit = netdev_exit,
+ .create = netdev_create,
+ .destroy = netdev_destroy,
+ .attach = netdev_attach,
+ .detach = netdev_detach,
+ .set_mtu = netdev_set_mtu,
+ .set_addr = netdev_set_addr,
+ .get_name = netdev_get_name,
+ .get_addr = netdev_get_addr,
+ .get_kobj = netdev_get_kobj,
+ .get_stats = netdev_get_stats,
+ .get_dev_flags = netdev_get_dev_flags,
+ .is_running = netdev_is_running,
+ .get_operstate = netdev_get_operstate,
+ .get_ifindex = netdev_get_ifindex,
+ .get_iflink = netdev_get_iflink,
+ .get_mtu = netdev_get_mtu,
+ .send = netdev_send,
+ };
--- /dev/null
-int netdev_get_stats(const struct vport *, struct odp_vport_stats *);
+ /*
+ * Copyright (c) 2010 Nicira Networks.
+ * Distributed under the terms of the GNU GPL version 2.
+ *
+ * Significant portions of this file may be copied from parts of the Linux
+ * kernel, by Linus Torvalds and others.
+ */
+
+ #ifndef VPORT_NETDEV_H
+ #define VPORT_NETDEV_H 1
+
+ #include <linux/netdevice.h>
+
+ #include "vport.h"
+
+ struct vport *netdev_get_vport(struct net_device *dev);
+
+ struct netdev_vport {
+ struct net_device *dev;
+ };
+
+ static inline struct netdev_vport *
+ netdev_vport_priv(const struct vport *vport)
+ {
+ return vport_priv(vport);
+ }
+
+ int netdev_set_mtu(struct vport *, int mtu);
+ int netdev_set_addr(struct vport *, const unsigned char *addr);
+ const char *netdev_get_name(const struct vport *);
+ const unsigned char *netdev_get_addr(const struct vport *);
+ struct kobject *netdev_get_kobj(const struct vport *);
++int netdev_get_stats(const struct vport *, struct xflow_vport_stats *);
+ unsigned netdev_get_dev_flags(const struct vport *);
+ int netdev_is_running(const struct vport *);
+ unsigned char netdev_get_operstate(const struct vport *);
+ int netdev_get_ifindex(const struct vport *);
+ int netdev_get_iflink(const struct vport *);
+ int netdev_get_mtu(const struct vport *);
+
+ #endif /* vport_netdev.h */
--- /dev/null
-do_vport_add(struct odp_vport_add *vport_config)
+ /*
+ * Copyright (c) 2010 Nicira Networks.
+ * Distributed under the terms of the GNU GPL version 2.
+ *
+ * Significant portions of this file may be copied from parts of the Linux
+ * kernel, by Linus Torvalds and others.
+ */
+
+ #include <linux/dcache.h>
+ #include <linux/etherdevice.h>
+ #include <linux/if.h>
+ #include <linux/kernel.h>
+ #include <linux/list.h>
+ #include <linux/mutex.h>
+ #include <linux/percpu.h>
+ #include <linux/rtnetlink.h>
+ #include <linux/compat.h>
+
+ #include "vport.h"
+
+ extern struct vport_ops netdev_vport_ops;
+ extern struct vport_ops internal_vport_ops;
+ extern struct vport_ops gre_vport_ops;
+
+ static struct vport_ops *base_vport_ops_list[] = {
+ &netdev_vport_ops,
+ &internal_vport_ops,
+ &gre_vport_ops,
+ };
+
+ static const struct vport_ops **vport_ops_list;
+ static int n_vport_types;
+
+ static struct hlist_head *dev_table;
+ #define VPORT_HASH_BUCKETS 1024
+
+ /* Both RTNL lock and vport_mutex need to be held when updating dev_table.
+ *
+ * If you use vport_locate and then perform some operations, you need to hold
+ * one of these locks if you don't want the vport to be deleted out from under
+ * you.
+ *
+ * If you get a reference to a vport through a dp_port, it is protected
+ * by RCU and you need to hold rcu_read_lock instead when reading.
+ *
+ * If multiple locks are taken, the hierarchy is:
+ * 1. RTNL
+ * 2. DP
+ * 3. vport
+ */
+ static DEFINE_MUTEX(vport_mutex);
+
+ /**
+ * vport_lock - acquire vport lock
+ *
+ * Acquire global vport lock. See above comment about locking requirements
+ * and specific function definitions. May sleep.
+ */
+ void
+ vport_lock(void)
+ {
+ mutex_lock(&vport_mutex);
+ }
+
+ /**
+ * vport_unlock - release vport lock
+ *
+ * Release lock acquired with vport_lock.
+ */
+ void
+ vport_unlock(void)
+ {
+ mutex_unlock(&vport_mutex);
+ }
+
+ #define ASSERT_VPORT() do { \
+ if (unlikely(!mutex_is_locked(&vport_mutex))) { \
+ printk(KERN_ERR "openvswitch: vport lock not held at %s (%d)\n", \
+ __FILE__, __LINE__); \
+ dump_stack(); \
+ } \
+ } while(0)
+
+ /**
+ * vport_init - initialize vport subsystem
+ *
+ * Called at module load time to initialize the vport subsystem and any
+ * compiled in vport types.
+ */
+ int
+ vport_init(void)
+ {
+ int err;
+ int i;
+
+ dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!dev_table) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ vport_ops_list = kmalloc(ARRAY_SIZE(base_vport_ops_list) *
+ sizeof(struct vport_ops *), GFP_KERNEL);
+ if (!vport_ops_list) {
+ err = -ENOMEM;
+ goto error_dev_table;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(base_vport_ops_list); i++) {
+ struct vport_ops *new_ops = base_vport_ops_list[i];
+
+ if (new_ops->get_stats && new_ops->flags & VPORT_F_GEN_STATS) {
+ printk(KERN_INFO "openvswitch: both get_stats() and VPORT_F_GEN_STATS defined on vport %s, dropping VPORT_F_GEN_STATS\n", new_ops->type);
+ new_ops->flags &= ~VPORT_F_GEN_STATS;
+ }
+
+ if (new_ops->init)
+ err = new_ops->init();
+ else
+ err = 0;
+
+ if (!err)
+ vport_ops_list[n_vport_types++] = new_ops;
+ else if (new_ops->flags & VPORT_F_REQUIRED) {
+ vport_exit();
+ goto error;
+ }
+ }
+
+ return 0;
+
+ error_dev_table:
+ kfree(dev_table);
+ error:
+ return err;
+ }
+
+ static void
+ vport_del_all(void)
+ {
+ int i;
+
+ rtnl_lock();
+ vport_lock();
+
+ for (i = 0; i < VPORT_HASH_BUCKETS; i++) {
+ struct hlist_head *bucket = &dev_table[i];
+ struct vport *vport;
+ struct hlist_node *node, *next;
+
+ hlist_for_each_entry_safe(vport, node, next, bucket, hash_node)
+ __vport_del(vport);
+ }
+
+ vport_unlock();
+ rtnl_unlock();
+ }
+
+ /**
+ * vport_exit - shutdown vport subsystem
+ *
+ * Called at module exit time to shutdown the vport subsystem and any
+ * initialized vport types.
+ */
+ void
+ vport_exit(void)
+ {
+ int i;
+
+ vport_del_all();
+
+ for (i = 0; i < n_vport_types; i++) {
+ if (vport_ops_list[i]->exit)
+ vport_ops_list[i]->exit();
+ }
+
+ kfree(vport_ops_list);
+ kfree(dev_table);
+ }
+
+ /**
+ * vport_add - add vport device (for userspace callers)
+ *
+ * @uvport_config: New port configuration.
+ *
+ * Creates a new vport with the specified configuration (which is dependent
+ * on device type). This function is for userspace callers and assumes no
+ * locks are held.
+ */
+ static int
-vport_add(const struct odp_vport_add __user *uvport_config)
++do_vport_add(struct xflow_vport_add *vport_config)
+ {
+ struct vport *vport;
+ int err = 0;
+
+ vport_config->port_type[VPORT_TYPE_SIZE - 1] = '\0';
+ vport_config->devname[IFNAMSIZ - 1] = '\0';
+
+ rtnl_lock();
+
+ vport = vport_locate(vport_config->devname);
+ if (vport) {
+ err = -EEXIST;
+ goto out;
+ }
+
+ vport_lock();
+ vport = __vport_add(vport_config->devname, vport_config->port_type,
+ vport_config->config);
+ vport_unlock();
+
+ if (IS_ERR(vport))
+ err = PTR_ERR(vport);
+
+ out:
+ rtnl_unlock();
+ return err;
+ }
+
+ int
- struct odp_vport_add vport_config;
++vport_add(const struct xflow_vport_add __user *uvport_config)
+ {
- if (copy_from_user(&vport_config, uvport_config, sizeof(struct odp_vport_add)))
++ struct xflow_vport_add vport_config;
+
-compat_vport_add(struct compat_odp_vport_add *ucompat)
++ if (copy_from_user(&vport_config, uvport_config, sizeof(struct xflow_vport_add)))
+ return -EFAULT;
+
+ return do_vport_add(&vport_config);
+ }
+
+ #ifdef CONFIG_COMPAT
+ int
- struct compat_odp_vport_add compat;
- struct odp_vport_add vport_config;
++compat_vport_add(struct compat_xflow_vport_add *ucompat)
+ {
- if (copy_from_user(&compat, ucompat, sizeof(struct compat_odp_vport_add)))
++ struct compat_xflow_vport_add compat;
++ struct xflow_vport_add vport_config;
+
-do_vport_mod(struct odp_vport_mod *vport_config)
++ if (copy_from_user(&compat, ucompat, sizeof(struct compat_xflow_vport_add)))
+ return -EFAULT;
+
+ memcpy(vport_config.port_type, compat.port_type, VPORT_TYPE_SIZE);
+ memcpy(vport_config.devname, compat.devname, IFNAMSIZ);
+ vport_config.config = compat_ptr(compat.config);
+
+ return do_vport_add(&vport_config);
+ }
+ #endif
+
+ /**
+ * vport_mod - modify existing vport device (for userspace callers)
+ *
+ * @uvport_config: New configuration for vport
+ *
+ * Modifies an existing device with the specified configuration (which is
+ * dependent on device type). This function is for userspace callers and
+ * assumes no locks are held.
+ */
+ static int
-vport_mod(const struct odp_vport_mod __user *uvport_config)
++do_vport_mod(struct xflow_vport_mod *vport_config)
+ {
+ struct vport *vport;
+ int err;
+
+ vport_config->devname[IFNAMSIZ - 1] = '\0';
+
+ rtnl_lock();
+
+ vport = vport_locate(vport_config->devname);
+ if (!vport) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ vport_lock();
+ err = __vport_mod(vport, vport_config->config);
+ vport_unlock();
+
+ out:
+ rtnl_unlock();
+ return err;
+ }
+
+ int
- struct odp_vport_mod vport_config;
++vport_mod(const struct xflow_vport_mod __user *uvport_config)
+ {
- if (copy_from_user(&vport_config, uvport_config, sizeof(struct odp_vport_mod)))
++ struct xflow_vport_mod vport_config;
+
-compat_vport_mod(struct compat_odp_vport_mod *ucompat)
++ if (copy_from_user(&vport_config, uvport_config, sizeof(struct xflow_vport_mod)))
+ return -EFAULT;
+
+ return do_vport_mod(&vport_config);
+ }
+
+ #ifdef CONFIG_COMPAT
+ int
- struct compat_odp_vport_mod compat;
- struct odp_vport_mod vport_config;
++compat_vport_mod(struct compat_xflow_vport_mod *ucompat)
+ {
- if (copy_from_user(&compat, ucompat, sizeof(struct compat_odp_vport_mod)))
++ struct compat_xflow_vport_mod compat;
++ struct xflow_vport_mod vport_config;
+
-vport_stats_get(struct odp_vport_stats_req __user *ustats_req)
++ if (copy_from_user(&compat, ucompat, sizeof(struct compat_xflow_vport_mod)))
+ return -EFAULT;
+
+ memcpy(vport_config.devname, compat.devname, IFNAMSIZ);
+ vport_config.config = compat_ptr(compat.config);
+
+ return do_vport_mod(&vport_config);
+ }
+ #endif
+
+ /**
+ * vport_del - delete existing vport device (for userspace callers)
+ *
+ * @udevname: Name of device to delete
+ *
+ * Deletes the specified device. Detaches the device from a datapath first
+ * if it is attached. Deleting the device will fail if it does not exist or it
+ * is the datapath local port. It is also possible to fail for less obvious
+ * reasons, such as lack of memory. This function is for userspace callers and
+ * assumes no locks are held.
+ */
+ int
+ vport_del(const char __user *udevname)
+ {
+ char devname[IFNAMSIZ];
+ struct vport *vport;
+ struct dp_port *dp_port;
+ int err = 0;
+ int retval;
+
+ retval = strncpy_from_user(devname, udevname, IFNAMSIZ);
+ if (retval < 0)
+ return -EFAULT;
+ else if (retval >= IFNAMSIZ)
+ return -ENAMETOOLONG;
+
+ rtnl_lock();
+
+ vport = vport_locate(devname);
+ if (!vport) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ dp_port = vport_get_dp_port(vport);
+ if (dp_port) {
+ struct datapath *dp = dp_port->dp;
+
+ mutex_lock(&dp->mutex);
+
+ if (!strcmp(dp_name(dp), devname)) {
+ err = -EINVAL;
+ goto dp_port_out;
+ }
+
+ err = dp_detach_port(dp_port, 0);
+
+ dp_port_out:
+ mutex_unlock(&dp->mutex);
+
+ if (err)
+ goto out;
+ }
+
+ vport_lock();
+ err = __vport_del(vport);
+ vport_unlock();
+
+ out:
+ rtnl_unlock();
+ return err;
+ }
+
+ /**
+ * vport_stats_get - retrieve device stats (for userspace callers)
+ *
+ * @ustats_req: Stats request parameters.
+ *
+ * Retrieves transmit, receive, and error stats for the given device. This
+ * function is for userspace callers and assumes no locks are held.
+ */
+ int
- struct odp_vport_stats_req stats_req;
++vport_stats_get(struct xflow_vport_stats_req __user *ustats_req)
+ {
- if (copy_from_user(&stats_req, ustats_req, sizeof(struct odp_vport_stats_req)))
++ struct xflow_vport_stats_req stats_req;
+ struct vport *vport;
+ int err;
+
- memset(&stats_req.stats, 0, sizeof(struct odp_vport_stats));
++ if (copy_from_user(&stats_req, ustats_req, sizeof(struct xflow_vport_stats_req)))
+ return -EFAULT;
+
+ stats_req.devname[IFNAMSIZ - 1] = '\0';
+
+ vport_lock();
+
+ vport = vport_locate(stats_req.devname);
+ if (!vport) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ if (vport->ops->get_stats) {
+ rcu_read_lock();
+ err = vport->ops->get_stats(vport, &stats_req.stats);
+ rcu_read_unlock();
+
+ } else if (vport->ops->flags & VPORT_F_GEN_STATS) {
+ int i;
+
- if (copy_to_user(ustats_req, &stats_req, sizeof(struct odp_vport_stats_req)))
++ memset(&stats_req.stats, 0, sizeof(struct xflow_vport_stats));
+
+ for_each_possible_cpu(i) {
+ const struct vport_percpu_stats *percpu_stats;
+
+ percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
+ stats_req.stats.rx_bytes += percpu_stats->rx_bytes;
+ stats_req.stats.rx_packets += percpu_stats->rx_packets;
+ stats_req.stats.tx_bytes += percpu_stats->tx_bytes;
+ stats_req.stats.tx_packets += percpu_stats->tx_packets;
+ }
+
+ spin_lock_bh(&vport->err_stats.lock);
+
+ stats_req.stats.rx_dropped = vport->err_stats.rx_dropped;
+ stats_req.stats.rx_errors = vport->err_stats.rx_errors
+ + vport->err_stats.rx_frame_err
+ + vport->err_stats.rx_over_err
+ + vport->err_stats.rx_crc_err;
+ stats_req.stats.rx_frame_err = vport->err_stats.rx_frame_err;
+ stats_req.stats.rx_over_err = vport->err_stats.rx_over_err;
+ stats_req.stats.rx_crc_err = vport->err_stats.rx_crc_err;
+ stats_req.stats.tx_dropped = vport->err_stats.tx_dropped;
+ stats_req.stats.tx_errors = vport->err_stats.tx_errors;
+ stats_req.stats.collisions = vport->err_stats.collisions;
+
+ spin_unlock_bh(&vport->err_stats.lock);
+
+ err = 0;
+ } else
+ err = -EOPNOTSUPP;
+
+ out:
+ vport_unlock();
+
+ if (!err)
-vport_ether_get(struct odp_vport_ether __user *uvport_ether)
++ if (copy_to_user(ustats_req, &stats_req, sizeof(struct xflow_vport_stats_req)))
+ err = -EFAULT;
+
+ return err;
+ }
+
+ /**
+ * vport_ether_get - retrieve device Ethernet address (for userspace callers)
+ *
+ * @uvport_ether: Ethernet address request parameters.
+ *
+ * Retrieves the Ethernet address of the given device. This function is for
+ * userspace callers and assumes no locks are held.
+ */
+ int
- struct odp_vport_ether vport_ether;
++vport_ether_get(struct xflow_vport_ether __user *uvport_ether)
+ {
- if (copy_from_user(&vport_ether, uvport_ether, sizeof(struct odp_vport_ether)))
++ struct xflow_vport_ether vport_ether;
+ struct vport *vport;
+ int err = 0;
+
- if (copy_to_user(uvport_ether, &vport_ether, sizeof(struct odp_vport_ether)))
++ if (copy_from_user(&vport_ether, uvport_ether, sizeof(struct xflow_vport_ether)))
+ return -EFAULT;
+
+ vport_ether.devname[IFNAMSIZ - 1] = '\0';
+
+ vport_lock();
+
+ vport = vport_locate(vport_ether.devname);
+ if (!vport) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ rcu_read_lock();
+ memcpy(vport_ether.ether_addr, vport_get_addr(vport), ETH_ALEN);
+ rcu_read_unlock();
+
+ out:
+ vport_unlock();
+
+ if (!err)
-vport_ether_set(struct odp_vport_ether __user *uvport_ether)
++ if (copy_to_user(uvport_ether, &vport_ether, sizeof(struct xflow_vport_ether)))
+ err = -EFAULT;
+
+ return err;
+ }
+
+ /**
+ * vport_ether_set - set device Ethernet address (for userspace callers)
+ *
+ * @uvport_ether: Ethernet address request parameters.
+ *
+ * Sets the Ethernet address of the given device. Some devices may not support
+ * setting the Ethernet address, in which case the result will always be
+ * -EOPNOTSUPP. This function is for userspace callers and assumes no locks
+ * are held.
+ */
+ int
- struct odp_vport_ether vport_ether;
++vport_ether_set(struct xflow_vport_ether __user *uvport_ether)
+ {
- if (copy_from_user(&vport_ether, uvport_ether, sizeof(struct odp_vport_ether)))
++ struct xflow_vport_ether vport_ether;
+ struct vport *vport;
+ int err;
+
-vport_mtu_get(struct odp_vport_mtu __user *uvport_mtu)
++ if (copy_from_user(&vport_ether, uvport_ether, sizeof(struct xflow_vport_ether)))
+ return -EFAULT;
+
+ vport_ether.devname[IFNAMSIZ - 1] = '\0';
+
+ rtnl_lock();
+ vport_lock();
+
+ vport = vport_locate(vport_ether.devname);
+ if (!vport) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ err = vport_set_addr(vport, vport_ether.ether_addr);
+
+ out:
+ vport_unlock();
+ rtnl_unlock();
+ return err;
+ }
+
+ /**
+ * vport_mut_get - retrieve device MTU (for userspace callers)
+ *
+ * @uvport_mtu: MTU request parameters.
+ *
+ * Retrieves the MTU of the given device. This function is for userspace
+ * callers and assumes no locks are held.
+ */
+ int
- struct odp_vport_mtu vport_mtu;
++vport_mtu_get(struct xflow_vport_mtu __user *uvport_mtu)
+ {
- if (copy_from_user(&vport_mtu, uvport_mtu, sizeof(struct odp_vport_mtu)))
++ struct xflow_vport_mtu vport_mtu;
+ struct vport *vport;
+ int err = 0;
+
- if (copy_to_user(uvport_mtu, &vport_mtu, sizeof(struct odp_vport_mtu)))
++ if (copy_from_user(&vport_mtu, uvport_mtu, sizeof(struct xflow_vport_mtu)))
+ return -EFAULT;
+
+ vport_mtu.devname[IFNAMSIZ - 1] = '\0';
+
+ vport_lock();
+
+ vport = vport_locate(vport_mtu.devname);
+ if (!vport) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ vport_mtu.mtu = vport_get_mtu(vport);
+
+ out:
+ vport_unlock();
+
+ if (!err)
-vport_mtu_set(struct odp_vport_mtu __user *uvport_mtu)
++ if (copy_to_user(uvport_mtu, &vport_mtu, sizeof(struct xflow_vport_mtu)))
+ err = -EFAULT;
+
+ return err;
+ }
+
+ /**
+ * vport_mtu_set - set device MTU (for userspace callers)
+ *
+ * @uvport_mtu: MTU request parameters.
+ *
+ * Sets the MTU of the given device. Some devices may not support setting the
+ * MTU, in which case the result will always be -EOPNOTSUPP. This function is
+ * for userspace callers and assumes no locks are held.
+ */
+ int
- struct odp_vport_mtu vport_mtu;
++vport_mtu_set(struct xflow_vport_mtu __user *uvport_mtu)
+ {
- if (copy_from_user(&vport_mtu, uvport_mtu, sizeof(struct odp_vport_mtu)))
++ struct xflow_vport_mtu vport_mtu;
+ struct vport *vport;
+ int err;
+
- return vport_get_ifindex(dp_port->dp->ports[ODPP_LOCAL]->vport);
++ if (copy_from_user(&vport_mtu, uvport_mtu, sizeof(struct xflow_vport_mtu)))
+ return -EFAULT;
+
+ vport_mtu.devname[IFNAMSIZ - 1] = '\0';
+
+ rtnl_lock();
+ vport_lock();
+
+ vport = vport_locate(vport_mtu.devname);
+ if (!vport) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ err = vport_set_mtu(vport, vport_mtu.mtu);
+
+ out:
+ vport_unlock();
+ rtnl_unlock();
+ return err;
+ }
+
+ static struct hlist_head *
+ hash_bucket(const char *name)
+ {
+ unsigned int hash = full_name_hash(name, strlen(name));
+ return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
+ }
+
+ /**
+ * vport_locate - find a port that has already been created
+ *
+ * @name: name of port to find
+ *
+ * Either RTNL or vport lock must be acquired before calling this function
+ * and held while using the found port. See the locking comments at the
+ * top of the file.
+ */
+ struct vport *
+ vport_locate(const char *name)
+ {
+ struct hlist_head *bucket = hash_bucket(name);
+ struct vport *vport;
+ struct hlist_node *node;
+
+ if (unlikely(!mutex_is_locked(&vport_mutex) && !rtnl_is_locked())) {
+ printk(KERN_ERR "openvswitch: neither RTNL nor vport lock held in vport_locate\n");
+ dump_stack();
+ }
+
+ rcu_read_lock();
+
+ hlist_for_each_entry(vport, node, bucket, hash_node)
+ if (!strcmp(name, vport_get_name(vport)))
+ goto out;
+
+ vport = NULL;
+
+ out:
+ rcu_read_unlock();
+ return vport;
+ }
+
+ static void
+ register_vport(struct vport *vport)
+ {
+ hlist_add_head(&vport->hash_node, hash_bucket(vport_get_name(vport)));
+ }
+
+ static void
+ unregister_vport(struct vport *vport)
+ {
+ hlist_del(&vport->hash_node);
+ }
+
+ /**
+ * vport_alloc - allocate and initialize new vport
+ *
+ * @priv_size: Size of private data area to allocate.
+ * @ops: vport device ops
+ *
+ * Allocate and initialize a new vport defined by @ops. The vport will contain
+ * a private data area of size @priv_size that can be accessed using
+ * vport_priv(). vports that are no longer needed should be released with
+ * vport_free().
+ */
+ struct vport *
+ vport_alloc(int priv_size, const struct vport_ops *ops)
+ {
+ struct vport *vport;
+ size_t alloc_size;
+
+ alloc_size = sizeof(struct vport);
+ if (priv_size) {
+ alloc_size = ALIGN(alloc_size, VPORT_ALIGN);
+ alloc_size += priv_size;
+ }
+
+ vport = kzalloc(alloc_size, GFP_KERNEL);
+ if (!vport)
+ return ERR_PTR(-ENOMEM);
+
+ vport->ops = ops;
+
+ if (vport->ops->flags & VPORT_F_GEN_STATS) {
+ vport->percpu_stats = alloc_percpu(struct vport_percpu_stats);
+ if (!vport->percpu_stats)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&vport->err_stats.lock);
+ }
+
+ return vport;
+ }
+
+ /**
+ * vport_free - uninitialize and free vport
+ *
+ * @vport: vport to free
+ *
+ * Frees a vport allocated with vport_alloc() when it is no longer needed.
+ */
+ void
+ vport_free(struct vport *vport)
+ {
+ if (vport->ops->flags & VPORT_F_GEN_STATS)
+ free_percpu(vport->percpu_stats);
+
+ kfree(vport);
+ }
+
+ /**
+ * __vport_add - add vport device (for kernel callers)
+ *
+ * @name: Name of new device.
+ * @type: Type of new device (to be matched against types in registered vport
+ * ops).
+ * @config: Device type specific configuration. Userspace pointer.
+ *
+ * Creates a new vport with the specified configuration (which is dependent
+ * on device type). Both RTNL and vport locks must be held.
+ */
+ struct vport *
+ __vport_add(const char *name, const char *type, const void __user *config)
+ {
+ struct vport *vport;
+ int err = 0;
+ int i;
+
+ ASSERT_RTNL();
+ ASSERT_VPORT();
+
+ for (i = 0; i < n_vport_types; i++) {
+ if (!strcmp(vport_ops_list[i]->type, type)) {
+ vport = vport_ops_list[i]->create(name, config);
+ if (IS_ERR(vport)) {
+ err = PTR_ERR(vport);
+ goto out;
+ }
+
+ register_vport(vport);
+ return vport;
+ }
+ }
+
+ err = -EAFNOSUPPORT;
+
+ out:
+ return ERR_PTR(err);
+ }
+
+ /**
+ * __vport_mod - modify existing vport device (for kernel callers)
+ *
+ * @vport: vport to modify.
+ * @config: Device type specific configuration. Userspace pointer.
+ *
+ * Modifies an existing device with the specified configuration (which is
+ * dependent on device type). Both RTNL and vport locks must be held.
+ */
+ int
+ __vport_mod(struct vport *vport, const void __user *config)
+ {
+ ASSERT_RTNL();
+ ASSERT_VPORT();
+
+ if (vport->ops->modify)
+ return vport->ops->modify(vport, config);
+ else
+ return -EOPNOTSUPP;
+ }
+
+ /**
+ * __vport_del - delete existing vport device (for kernel callers)
+ *
+ * @vport: vport to delete.
+ *
+ * Deletes the specified device. The device must not be currently attached to
+ * a datapath. It is possible to fail for reasons such as lack of memory.
+ * Both RTNL and vport locks must be held.
+ */
+ int
+ __vport_del(struct vport *vport)
+ {
+ ASSERT_RTNL();
+ ASSERT_VPORT();
+ BUG_ON(vport_get_dp_port(vport));
+
+ unregister_vport(vport);
+
+ return vport->ops->destroy(vport);
+ }
+
+ /**
+ * vport_attach - attach a vport to a datapath
+ *
+ * @vport: vport to attach.
+ * @dp_port: Datapath port to attach the vport to.
+ *
+ * Attaches a vport to a specific datapath so that packets may be exchanged.
+ * Both ports must be currently unattached. @dp_port must be successfully
+ * attached to a vport before it is connected to a datapath and must not be
+ * modified while connected. RTNL lock and the appropriate DP mutex must be held.
+ */
+ int
+ vport_attach(struct vport *vport, struct dp_port *dp_port)
+ {
+ ASSERT_RTNL();
+
+ if (dp_port->vport)
+ return -EBUSY;
+
+ if (vport_get_dp_port(vport))
+ return -EBUSY;
+
+ if (vport->ops->attach) {
+ int err;
+
+ err = vport->ops->attach(vport);
+ if (err)
+ return err;
+ }
+
+ dp_port->vport = vport;
+ rcu_assign_pointer(vport->dp_port, dp_port);
+
+ return 0;
+ }
+
+ /**
+ * vport_detach - detach a vport from a datapath
+ *
+ * @vport: vport to detach.
+ *
+ * Detaches a vport from a datapath. May fail for a variety of reasons,
+ * including lack of memory. RTNL lock and the appropriate DP mutex must be held.
+ */
+ int
+ vport_detach(struct vport *vport)
+ {
+ struct dp_port *dp_port;
+
+ ASSERT_RTNL();
+
+ dp_port = vport_get_dp_port(vport);
+ if (!dp_port)
+ return -EINVAL;
+
+ dp_port->vport = NULL;
+ rcu_assign_pointer(vport->dp_port, NULL);
+
+ if (vport->ops->detach)
+ return vport->ops->detach(vport);
+ else
+ return 0;
+ }
+
+ /**
+ * vport_set_mtu - set device MTU (for kernel callers)
+ *
+ * @vport: vport on which to set MTU.
+ * @mtu: New MTU.
+ *
+ * Sets the MTU of the given device. Some devices may not support setting the
+ * MTU, in which case the result will always be -EOPNOTSUPP. RTNL lock must
+ * be held.
+ */
+ int
+ vport_set_mtu(struct vport *vport, int mtu)
+ {
+ ASSERT_RTNL();
+
+ if (mtu < 68)
+ return -EINVAL;
+
+ if (vport->ops->set_mtu)
+ return vport->ops->set_mtu(vport, mtu);
+ else
+ return -EOPNOTSUPP;
+ }
+
+ /**
+ * vport_set_addr - set device Ethernet address (for kernel callers)
+ *
+ * @vport: vport on which to set Ethernet address.
+ * @addr: New address.
+ *
+ * Sets the Ethernet address of the given device. Some devices may not support
+ * setting the Ethernet address, in which case the result will always be
+ * -EOPNOTSUPP. RTNL lock must be held.
+ */
+ int
+ vport_set_addr(struct vport *vport, const unsigned char *addr)
+ {
+ ASSERT_RTNL();
+
+ if (!is_valid_ether_addr(addr))
+ return -EADDRNOTAVAIL;
+
+ if (vport->ops->set_addr)
+ return vport->ops->set_addr(vport, addr);
+ else
+ return -EOPNOTSUPP;
+ }
+
+ /**
+ * vport_get_name - retrieve device name
+ *
+ * @vport: vport from which to retrieve the name.
+ *
+ * Retrieves the name of the given device. Either RTNL lock or rcu_read_lock
+ * must be held for the entire duration that the name is in use.
+ */
+ const char *
+ vport_get_name(const struct vport *vport)
+ {
+ return vport->ops->get_name(vport);
+ }
+
+ /**
+ * vport_get_type - retrieve device type
+ *
+ * @vport: vport from which to retrieve the type.
+ *
+ * Retrieves the type of the given device. Either RTNL lock or rcu_read_lock
+ * must be held for the entire duration that the type is in use.
+ */
+ const char *
+ vport_get_type(const struct vport *vport)
+ {
+ return vport->ops->type;
+ }
+
+ /**
+ * vport_get_addr - retrieve device Ethernet address (for kernel callers)
+ *
+ * @vport: vport from which to retrieve the Ethernet address.
+ *
+ * Retrieves the Ethernet address of the given device. Either RTNL lock or
+ * rcu_read_lock must be held for the entire duration that the Ethernet address
+ * is in use.
+ */
+ const unsigned char *
+ vport_get_addr(const struct vport *vport)
+ {
+ return vport->ops->get_addr(vport);
+ }
+
+ /**
+ * vport_get_dp_port - retrieve attached datapath port
+ *
+ * @vport: vport from which to retrieve the datapath port.
+ *
+ * Retrieves the attached datapath port or null if not attached. Either RTNL
+ * lock or rcu_read_lock must be held for the entire duration that the datapath
+ * port is being accessed.
+ */
+ struct dp_port *
+ vport_get_dp_port(const struct vport *vport)
+ {
+ return rcu_dereference(vport->dp_port);
+ }
+
+ /**
+ * vport_get_kobj - retrieve associated kobj
+ *
+ * @vport: vport from which to retrieve the associated kobj
+ *
+ * Retrieves the associated kobj or null if no kobj. The returned kobj is
+ * valid for as long as the vport exists.
+ */
+ struct kobject *
+ vport_get_kobj(const struct vport *vport)
+ {
+ if (vport->ops->get_kobj)
+ return vport->ops->get_kobj(vport);
+ else
+ return NULL;
+ }
+
+ /**
+ * vport_get_flags - retrieve device flags
+ *
+ * @vport: vport from which to retrieve the flags
+ *
+ * Retrieves the flags of the given device. Either RTNL lock or rcu_read_lock
+ * must be held.
+ */
+ unsigned
+ vport_get_flags(const struct vport *vport)
+ {
+ return vport->ops->get_dev_flags(vport);
+ }
+
+ /**
+ * vport_get_flags - check whether device is running
+ *
+ * @vport: vport on which to check status.
+ *
+ * Checks whether the given device is running. Either RTNL lock or
+ * rcu_read_lock must be held.
+ */
+ int
+ vport_is_running(const struct vport *vport)
+ {
+ return vport->ops->is_running(vport);
+ }
+
+ /**
+ * vport_get_flags - retrieve device operating state
+ *
+ * @vport: vport from which to check status
+ *
+ * Retrieves the RFC2863 operstate of the given device. Either RTNL lock or
+ * rcu_read_lock must be held.
+ */
+ unsigned char
+ vport_get_operstate(const struct vport *vport)
+ {
+ return vport->ops->get_operstate(vport);
+ }
+
+ /**
+ * vport_get_ifindex - retrieve device system interface index
+ *
+ * @vport: vport from which to retrieve index
+ *
+ * Retrieves the system interface index of the given device. Not all devices
+ * will have system indexes, in which case the index of the datapath local
+ * port is returned. Returns a negative index on error. Either RTNL lock or
+ * rcu_read_lock must be held.
+ */
+ int
+ vport_get_ifindex(const struct vport *vport)
+ {
+ const struct dp_port *dp_port;
+
+ if (vport->ops->get_ifindex)
+ return vport->ops->get_ifindex(vport);
+
+ /* If we don't actually have an ifindex, use the local port's.
+ * Userspace doesn't check it anyways. */
+ dp_port = vport_get_dp_port(vport);
+ if (!dp_port)
+ return -EAGAIN;
+
++ return vport_get_ifindex(dp_port->dp->ports[XFLOWP_LOCAL]->vport);
+ }
+
+ /**
+ * vport_get_iflink - retrieve device system link index
+ *
+ * @vport: vport from which to retrieve index
+ *
+ * Retrieves the system link index of the given device. The link is the index
+ * of the interface on which the packet will actually be sent. In most cases
+ * this is the same as the ifindex but may be different for tunnel devices.
+ * Returns a negative index on error. Either RTNL lock or rcu_read_lock must
+ * be held.
+ */
+ int
+ vport_get_iflink(const struct vport *vport)
+ {
+ if (vport->ops->get_iflink)
+ return vport->ops->get_iflink(vport);
+
+ /* If we don't have an iflink, use the ifindex. In most cases they
+ * are the same. */
+ return vport_get_ifindex(vport);
+ }
+
+ /**
+ * vport_get_mtu - retrieve device MTU (for kernel callers)
+ *
+ * @vport: vport from which to retrieve MTU
+ *
+ * Retrieves the MTU of the given device. Either RTNL lock or rcu_read_lock
+ * must be held.
+ */
+ int
+ vport_get_mtu(const struct vport *vport)
+ {
+ return vport->ops->get_mtu(vport);
+ }
+
+ /**
+ * vport_receive - pass up received packet to the datapath for processing
+ *
+ * @vport: vport that received the packet
+ * @skb: skb that was received
+ *
+ * Must be called with rcu_read_lock. The packet cannot be shared and
+ * skb->data should point to the Ethernet header. The caller must have already
+ * called compute_ip_summed() to initialize the checksumming fields.
+ */
+ void
+ vport_receive(struct vport *vport, struct sk_buff *skb)
+ {
+ struct dp_port *dp_port = vport_get_dp_port(vport);
+
+ if (!dp_port) {
+ vport_record_error(vport, VPORT_E_RX_DROPPED);
+ kfree_skb(skb);
+
+ return;
+ }
+
+ if (vport->ops->flags & VPORT_F_GEN_STATS) {
+ struct vport_percpu_stats *stats;
+
+ local_bh_disable();
+
+ stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
+ stats->rx_packets++;
+ stats->rx_bytes += skb->len;
+
+ local_bh_enable();
+ }
+
+ if (!(vport->ops->flags & VPORT_F_TUN_ID))
+ OVS_CB(skb)->tun_id = 0;
+
+ dp_process_received_packet(dp_port, skb);
+ }
+
+ /**
+ * vport_send - send a packet on a device
+ *
+ * @vport: vport on which to send the packet
+ * @skb: skb to send
+ *
+ * Sends the given packet and returns the length of data sent. Either RTNL
+ * lock or rcu_read_lock must be held.
+ */
+ int
+ vport_send(struct vport *vport, struct sk_buff *skb)
+ {
+ int sent;
+
+ sent = vport->ops->send(vport, skb);
+
+ if (vport->ops->flags & VPORT_F_GEN_STATS && sent > 0) {
+ struct vport_percpu_stats *stats;
+
+ local_bh_disable();
+
+ stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
+ stats->tx_packets++;
+ stats->tx_bytes += sent;
+
+ local_bh_enable();
+ }
+
+ return sent;
+ }
+
+ /**
+ * vport_record_error - indicate device error to generic stats layer
+ *
+ * @vport: vport that encountered the error
+ * @err_type: one of enum vport_err_type types to indicate the error type
+ *
+ * If using the vport generic stats layer indicate that an error of the given
+ * type has occured.
+ */
+ void
+ vport_record_error(struct vport *vport, enum vport_err_type err_type)
+ {
+ if (vport->ops->flags & VPORT_F_GEN_STATS) {
+
+ spin_lock_bh(&vport->err_stats.lock);
+
+ switch (err_type) {
+ case VPORT_E_RX_DROPPED:
+ vport->err_stats.rx_dropped++;
+ break;
+
+ case VPORT_E_RX_ERROR:
+ vport->err_stats.rx_errors++;
+ break;
+
+ case VPORT_E_RX_FRAME:
+ vport->err_stats.rx_frame_err++;
+ break;
+
+ case VPORT_E_RX_OVER:
+ vport->err_stats.rx_over_err++;
+ break;
+
+ case VPORT_E_RX_CRC:
+ vport->err_stats.rx_crc_err++;
+ break;
+
+ case VPORT_E_TX_DROPPED:
+ vport->err_stats.tx_dropped++;
+ break;
+
+ case VPORT_E_TX_ERROR:
+ vport->err_stats.tx_errors++;
+ break;
+
+ case VPORT_E_COLLISION:
+ vport->err_stats.collisions++;
+ break;
+ };
+
+ spin_unlock_bh(&vport->err_stats.lock);
+ }
+ }
+
+ /**
+ * vport_gen_ether_addr - generate an Ethernet address
+ *
+ * @addr: location to store generated address
+ *
+ * Generates a random Ethernet address for use when creating a device that
+ * has no natural address.
+ */
+ void
+ vport_gen_ether_addr(u8 *addr)
+ {
+ random_ether_addr(addr);
+
+ /* Set the OUI to the Nicira one. */
+ addr[0] = 0x00;
+ addr[1] = 0x23;
+ addr[2] = 0x20;
+
+ /* Set the top bit to indicate random address. */
+ addr[3] |= 0x80;
+ }
--- /dev/null
-#include "openvswitch/datapath-protocol.h"
-#include "odp-compat.h"
+ /*
+ * Copyright (c) 2010 Nicira Networks.
+ * Distributed under the terms of the GNU GPL version 2.
+ *
+ * Significant portions of this file may be copied from parts of the Linux
+ * kernel, by Linus Torvalds and others.
+ */
+
+ #ifndef VPORT_H
+ #define VPORT_H 1
+
+ #include <linux/list.h>
+ #include <linux/skbuff.h>
+ #include <linux/spinlock.h>
+
+ #include "datapath.h"
-int vport_add(const struct odp_vport_add __user *);
-int vport_mod(const struct odp_vport_mod __user *);
++#include "openvswitch/xflow.h"
++#include "xflow-compat.h"
+
+ struct vport;
+ struct dp_port;
+
+ /* The following definitions are for users of the vport subsytem: */
+
+ void vport_lock(void);
+ void vport_unlock(void);
+
+ int vport_init(void);
+ void vport_exit(void);
+
-int compat_vport_add(struct compat_odp_vport_add __user *);
-int compat_vport_mod(struct compat_odp_vport_mod __user *);
++int vport_add(const struct xflow_vport_add __user *);
++int vport_mod(const struct xflow_vport_mod __user *);
+ int vport_del(const char __user *udevname);
+
+ #ifdef CONFIG_COMPAT
-int vport_stats_get(struct odp_vport_stats_req __user *);
-int vport_ether_get(struct odp_vport_ether __user *);
-int vport_ether_set(struct odp_vport_ether __user *);
-int vport_mtu_get(struct odp_vport_mtu __user *);
-int vport_mtu_set(struct odp_vport_mtu __user *);
++int compat_vport_add(struct compat_xflow_vport_add __user *);
++int compat_vport_mod(struct compat_xflow_vport_mod __user *);
+ #endif
+
- int (*get_stats)(const struct vport *, struct odp_vport_stats *);
++int vport_stats_get(struct xflow_vport_stats_req __user *);
++int vport_ether_get(struct xflow_vport_ether __user *);
++int vport_ether_set(struct xflow_vport_ether __user *);
++int vport_mtu_get(struct xflow_vport_mtu __user *);
++int vport_mtu_set(struct xflow_vport_mtu __user *);
+
+ struct vport *__vport_add(const char *name, const char *type, const void __user *config);
+ int __vport_mod(struct vport *, const void __user *config);
+ int __vport_del(struct vport *);
+
+ struct vport *vport_locate(const char *name);
+
+ int vport_attach(struct vport *, struct dp_port *);
+ int vport_detach(struct vport *);
+
+ int vport_set_mtu(struct vport *, int mtu);
+ int vport_set_addr(struct vport *, const unsigned char *);
+
+ const char *vport_get_name(const struct vport *);
+ const char *vport_get_type(const struct vport *);
+ const unsigned char *vport_get_addr(const struct vport *);
+
+ struct dp_port *vport_get_dp_port(const struct vport *);
+
+ struct kobject *vport_get_kobj(const struct vport *);
+
+ unsigned vport_get_flags(const struct vport *);
+ int vport_is_running(const struct vport *);
+ unsigned char vport_get_operstate(const struct vport *);
+
+ int vport_get_ifindex(const struct vport *);
+ int vport_get_iflink(const struct vport *);
+
+ int vport_get_mtu(const struct vport *);
+
+ int vport_send(struct vport *, struct sk_buff *);
+
+ /* The following definitions are for implementers of vport devices: */
+
+ struct vport_percpu_stats {
+ u64 rx_bytes;
+ u64 rx_packets;
+ u64 tx_bytes;
+ u64 tx_packets;
+ };
+
+ struct vport_err_stats {
+ spinlock_t lock;
+
+ u64 rx_dropped;
+ u64 rx_errors;
+ u64 rx_frame_err;
+ u64 rx_over_err;
+ u64 rx_crc_err;
+ u64 tx_dropped;
+ u64 tx_errors;
+ u64 collisions;
+ };
+
+ struct vport {
+ struct hlist_node hash_node;
+ const struct vport_ops *ops;
+ struct dp_port *dp_port;
+
+ struct vport_percpu_stats *percpu_stats;
+ struct vport_err_stats err_stats;
+ };
+
+ #define VPORT_F_REQUIRED (1 << 0) /* If init fails, module loading fails. */
+ #define VPORT_F_GEN_STATS (1 << 1) /* Track stats at the generic layer. */
+ #define VPORT_F_TUN_ID (1 << 2) /* Sets OVS_CB(skb)->tun_id. */
+
+ /**
+ * struct vport_ops - definition of a type of virtual port
+ *
+ * @type: Name of port type, such as "netdev" or "internal" to be matched
+ * against the device type when a new port needs to be created.
+ * @flags: Flags of type VPORT_F_* that influence how the generic vport layer
+ * handles this vport.
+ * @init: Called at module initialization. If VPORT_F_REQUIRED is set then the
+ * failure of this function will cause the module to not load. If the flag is
+ * not set and initialzation fails then no vports of this type can be created.
+ * @exit: Called at module unload.
+ * @create: Create a new vport called 'name' with vport type specific
+ * configuration 'config' (which must be copied from userspace before use). On
+ * success must allocate a new vport using vport_alloc().
+ * @modify: Modify the configuration of an existing vport. May be null if
+ * modification is not supported.
+ * @destroy: Destroy and free a vport using vport_free(). Prior to destruction
+ * @detach will be called followed by synchronize_rcu().
+ * @attach: Attach a previously created vport to a datapath. After attachment
+ * packets may be sent and received. Prior to attachment any packets may be
+ * silently discarded. May be null if not needed.
+ * @detach: Detach a vport from a datapath. May be null if not needed.
+ * @set_mtu: Set the device's MTU. May be null if not supported.
+ * @set_addr: Set the device's MAC address. May be null if not supported.
+ * @get_name: Get the device's name.
+ * @get_addr: Get the device's MAC address.
+ * @get_kobj: Get the kobj associated with the device (may return null).
+ * @get_stats: Fill in the transmit/receive stats. May be null if stats are
+ * not supported or if generic stats are in use. If defined overrides
+ * VPORT_F_GEN_STATS.
+ * @get_dev_flags: Get the device's flags.
+ * @is_running: Checks whether the device is running.
+ * @get_operstate: Get the device's operating state.
+ * @get_ifindex: Get the system interface index associated with the device.
+ * May be null if the device does not have an ifindex.
+ * @get_iflink: Get the system interface index associated with the device that
+ * will be used to send packets (may be different than ifindex for tunnels).
+ * May be null if the device does not have an iflink.
+ * @get_mtu: Get the device's MTU.
+ * @send: Send a packet on the device. Returns the length of the packet sent.
+ */
+ struct vport_ops {
+ const char *type;
+ u32 flags;
+
+ /* Called at module init and exit respectively. */
+ int (*init)(void);
+ void (*exit)(void);
+
+ /* Called with RTNL lock. */
+ struct vport *(*create)(const char *name, const void __user *config);
+ int (*modify)(struct vport *, const void __user *config);
+ int (*destroy)(struct vport *);
+
+ int (*attach)(struct vport *);
+ int (*detach)(struct vport *);
+
+ int (*set_mtu)(struct vport *, int mtu);
+ int (*set_addr)(struct vport *, const unsigned char *);
+
+ /* Called with rcu_read_lock or RTNL lock. */
+ const char *(*get_name)(const struct vport *);
+ const unsigned char *(*get_addr)(const struct vport *);
+ struct kobject *(*get_kobj)(const struct vport *);
++ int (*get_stats)(const struct vport *, struct xflow_vport_stats *);
+
+ unsigned (*get_dev_flags)(const struct vport *);
+ int (*is_running)(const struct vport *);
+ unsigned char (*get_operstate)(const struct vport *);
+
+ int (*get_ifindex)(const struct vport *);
+ int (*get_iflink)(const struct vport *);
+
+ int (*get_mtu)(const struct vport *);
+
+ int (*send)(struct vport *, struct sk_buff *);
+ };
+
+ enum vport_err_type {
+ VPORT_E_RX_DROPPED,
+ VPORT_E_RX_ERROR,
+ VPORT_E_RX_FRAME,
+ VPORT_E_RX_OVER,
+ VPORT_E_RX_CRC,
+ VPORT_E_TX_DROPPED,
+ VPORT_E_TX_ERROR,
+ VPORT_E_COLLISION,
+ };
+
+ struct vport *vport_alloc(int priv_size, const struct vport_ops *);
+ void vport_free(struct vport *);
+
+ #define VPORT_ALIGN 8
+
+ /**
+ * vport_priv - access private data area of vport
+ *
+ * @vport: vport to access
+ *
+ * If a nonzero size was passed in priv_size of vport_alloc() a private data
+ * area was allocated on creation. This allows that area to be accessed and
+ * used for any purpose needed by the vport implementer.
+ */
+ static inline void *
+ vport_priv(const struct vport *vport)
+ {
+ return (u8 *)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN);
+ }
+
+ /**
+ * vport_from_priv - lookup vport from private data pointer
+ *
+ * @priv: Start of private data area.
+ *
+ * It is sometimes useful to translate from a pointer to the private data
+ * area to the vport, such as in the case where the private data pointer is
+ * the result of a hash table lookup. @priv must point to the start of the
+ * private data area.
+ */
+ static inline struct vport *
+ vport_from_priv(const void *priv)
+ {
+ return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
+ }
+
+ void vport_receive(struct vport *, struct sk_buff *);
+ void vport_record_error(struct vport *, enum vport_err_type err_type);
+ void vport_gen_ether_addr(u8 *addr);
+
+ #endif /* vport.h */
--- /dev/null
--- /dev/null
++/*
++ * Copyright (c) 2010 Nicira Networks.
++ * Distributed under the terms of the GNU GPL version 2.
++ *
++ * Significant portions of this file may be copied from parts of the Linux
++ * kernel, by Linus Torvalds and others.
++ */
++
++#ifndef XFLOW_COMPAT_H
++#define XFLOW_COMPAT_H 1
++
++/* 32-bit ioctl compatibility definitions for datapath protocol. */
++
++#ifdef CONFIG_COMPAT
++#include "openvswitch/datapath-protocol.h"
++#include <linux/compat.h>
++
++#define XFLOW_PORT_LIST32 _IOWR('O', 10, struct compat_xflow_portvec)
++#define XFLOW_PORT_GROUP_SET32 _IOR('O', 11, struct compat_xflow_port_group)
++#define XFLOW_PORT_GROUP_GET32 _IOWR('O', 12, struct compat_xflow_port_group)
++#define XFLOW_FLOW_GET32 _IOWR('O', 13, struct compat_xflow_flow)
++#define XFLOW_FLOW_PUT32 _IOWR('O', 14, struct compat_xflow_flow)
++#define XFLOW_FLOW_LIST32 _IOWR('O', 15, struct compat_xflow_flowvec)
++#define XFLOW_FLOW_DEL32 _IOWR('O', 17, struct compat_xflow_flow)
++#define XFLOW_EXECUTE32 _IOR('O', 18, struct compat_xflow_execute)
++#define XFLOW_FLOW_DEL32 _IOWR('O', 17, struct compat_xflow_flow)
++#define XFLOW_VPORT_ADD32 _IOR('O', 21, struct compat_xflow_vport_add)
++#define XFLOW_VPORT_MOD32 _IOR('O', 22, struct compat_xflow_vport_mod)
++
++struct compat_xflow_portvec {
++ compat_uptr_t ports;
++ u32 n_ports;
++};
++
++struct compat_xflow_port_group {
++ compat_uptr_t ports;
++ u16 n_ports; /* Number of ports. */
++ u16 group; /* Group number. */
++};
++
++struct compat_xflow_flow {
++ struct xflow_flow_stats stats;
++ struct xflow_key key;
++ compat_uptr_t actions;
++ u32 n_actions;
++ u32 flags;
++};
++
++struct compat_xflow_flow_put {
++ struct compat_xflow_flow flow;
++ u32 flags;
++};
++
++struct compat_xflow_flowvec {
++ compat_uptr_t flows;
++ u32 n_flows;
++};
++
++struct compat_xflow_execute {
++ u16 in_port;
++ u16 reserved1;
++ u32 reserved2;
++
++ compat_uptr_t actions;
++ u32 n_actions;
++
++ compat_uptr_t data;
++ u32 length;
++};
++
++struct compat_xflow_vport_add {
++ char port_type[VPORT_TYPE_SIZE];
++ char devname[16]; /* IFNAMSIZ */
++ compat_uptr_t config;
++};
++
++struct compat_xflow_vport_mod {
++ char devname[16]; /* IFNAMSIZ */
++ compat_uptr_t config;
++};
++#endif /* CONFIG_COMPAT */
++
++#endif /* xflow-compat.h */
noinst_HEADERS += \
include/openvswitch/gre.h \
include/openvswitch/brcompat-netlink.h \
- include/openvswitch/datapath-protocol.h
+ include/openvswitch/internal_dev.h \
+ include/openvswitch/xflow.h
/*
-- * Copyright (c) 2008, 2009 Nicira Networks.
++ * Copyright (c) 2008, 2009, 2010 Nicira Networks.
*
* This file is offered under your choice of two licenses: Apache 2.0 or GNU
* GPL 2.0 or later. The permission statements for each of these licenses is
#ifndef OPENVSWITCH_GRE_H
#define OPENVSWITCH_GRE_H 1
- #include <linux/if_tunnel.h>
- #include <linux/version.h>
-#ifdef __KERNEL__
+ #include <linux/types.h>
-#else
-#include <sys/types.h>
-#endif
- #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
- #define GRE_IOCTL_ONLY
- #elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
- enum
- {
- IFLA_GRE_UNSPEC,
- IFLA_GRE_LINK,
- IFLA_GRE_IFLAGS,
- IFLA_GRE_OFLAGS,
- IFLA_GRE_IKEY,
- IFLA_GRE_OKEY,
- IFLA_GRE_LOCAL,
- IFLA_GRE_REMOTE,
- IFLA_GRE_TTL,
- IFLA_GRE_TOS,
- IFLA_GRE_PMTUDISC,
- __IFLA_GRE_MAX,
- };
-
- #define IFLA_GRE_MAX (__IFLA_GRE_MAX - 1)
- #endif
+ #define GRE_F_IN_CSUM (1 << 0) /* Require incoming packets to have checksums. */
+ #define GRE_F_OUT_CSUM (1 << 1) /* Checksum outgoing packets. */
+ #define GRE_F_IN_KEY_MATCH (1 << 2) /* Store the key in tun_id to match in flow table. */
+ #define GRE_F_OUT_KEY_ACTION (1 << 3) /* Get the key from a SET_TUNNEL action. */
+ #define GRE_F_TOS_INHERIT (1 << 4) /* Inherit the ToS from the inner packet. */
+ #define GRE_F_TTL_INHERIT (1 << 5) /* Inherit the TTL from the inner packet. */
+ #define GRE_F_PMTUD (1 << 6) /* Enable path MTU discovery. */
- #define GRE_IOCTL_DEVICE "gre0"
-
- #define SIOCGETGRETAP SIOCGETTUNNEL
- #define SIOCADDGRETAP (SIOCDEVPRIVATE + 10)
- #define SIOCDELGRETAP SIOCDELTUNNEL
- #define SIOCCHGGRETAP (SIOCDEVPRIVATE + 11)
+ struct gre_port_config {
+ __u32 flags;
+ __be32 saddr;
+ __be32 daddr;
+ __be32 in_key;
+ __be32 out_key;
+ __u8 tos;
+ __u8 ttl;
+ };
#endif /* openvswitch/gre.h */
--- /dev/null
- /* Protocol between userspace and kernel datapath. */
+/*
+ * Copyright (c) 2009, 2010 Nicira Networks.
+ *
+ * This file is offered under your choice of two licenses: Apache 2.0 or GNU
+ * GPL 2.0 or later. The permission statements for each of these licenses is
+ * given below. You may license your modifications to this file under either
+ * of these licenses or both. If you wish to license your modifications under
+ * only one of these licenses, delete the permission text for the other
+ * license.
+ *
+ * ----------------------------------------------------------------------
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ----------------------------------------------------------------------
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ * ----------------------------------------------------------------------
+ */
+
- #define XFLOW_PORT_ADD _IOR('O', 7, struct xflow_port)
- #define XFLOW_PORT_DEL _IOR('O', 8, int)
++/* Protocol between userspace and kernel datapath.
++ *
++ * Be sure to update datapath/xflow-compat.h if you change any of the
++ * structures in here. */
+
+#ifndef XFLOW_H
+#define XFLOW_H 1
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <sys/types.h>
+#endif
+#include <linux/if_ether.h>
+
+#define XFLOW_MAX 256 /* Maximum number of datapaths. */
+
+#define XFLOW_DP_CREATE _IO('O', 0)
+#define XFLOW_DP_DESTROY _IO('O', 1)
+#define XFLOW_DP_STATS _IOW('O', 2, struct xflow_stats)
+
+#define XFLOW_GET_DROP_FRAGS _IOW('O', 3, int)
+#define XFLOW_SET_DROP_FRAGS _IOR('O', 4, int)
+
+#define XFLOW_GET_LISTEN_MASK _IOW('O', 5, int)
+#define XFLOW_SET_LISTEN_MASK _IOR('O', 6, int)
+
- * @arg member is unused and set to 0.
- *
++#define XFLOW_PORT_ATTACH _IOR('O', 7, struct xflow_port)
++#define XFLOW_PORT_DETACH _IOR('O', 8, int)
+#define XFLOW_PORT_QUERY _IOWR('O', 9, struct xflow_port)
+#define XFLOW_PORT_LIST _IOWR('O', 10, struct xflow_portvec)
+
+#define XFLOW_PORT_GROUP_SET _IOR('O', 11, struct xflow_port_group)
+#define XFLOW_PORT_GROUP_GET _IOWR('O', 12, struct xflow_port_group)
+
+#define XFLOW_FLOW_GET _IOWR('O', 13, struct xflow_flow)
+#define XFLOW_FLOW_PUT _IOWR('O', 14, struct xflow_flow)
+#define XFLOW_FLOW_LIST _IOWR('O', 15, struct xflow_flowvec)
+#define XFLOW_FLOW_FLUSH _IO('O', 16)
+#define XFLOW_FLOW_DEL _IOWR('O', 17, struct xflow_flow)
+
+#define XFLOW_EXECUTE _IOR('O', 18, struct xflow_execute)
+
+#define XFLOW_SET_SFLOW_PROBABILITY _IOR('O', 19, int)
+#define XFLOW_GET_SFLOW_PROBABILITY _IOW('O', 20, int)
+
++#define XFLOW_VPORT_ADD _IOR('O', 21, struct xflow_vport_add)
++#define XFLOW_VPORT_MOD _IOR('O', 22, struct xflow_vport_mod)
++#define XFLOW_VPORT_DEL _IO('O', 23)
++#define XFLOW_VPORT_STATS_GET _IOWR('O', 24, struct xflow_vport_stats_req)
++#define XFLOW_VPORT_ETHER_GET _IOWR('O', 25, struct xflow_vport_ether)
++#define XFLOW_VPORT_ETHER_SET _IOW('O', 26, struct xflow_vport_ether)
++#define XFLOW_VPORT_MTU_GET _IOWR('O', 27, struct xflow_vport_mtu)
++#define XFLOW_VPORT_MTU_SET _IOW('O', 28, struct xflow_vport_mtu)
++
+struct xflow_stats {
+ /* Flows. */
+ __u32 n_flows; /* Number of flows in flow table. */
+ __u32 cur_capacity; /* Current flow table capacity. */
+ __u32 max_capacity; /* Maximum expansion of flow table capacity. */
+
+ /* Ports. */
+ __u32 n_ports; /* Current number of ports. */
+ __u32 max_ports; /* Maximum supported number of ports. */
+ __u16 max_groups; /* Maximum number of port groups. */
+ __u16 reserved;
+
+ /* Lookups. */
+ __u64 n_frags; /* Number of dropped IP fragments. */
+ __u64 n_hit; /* Number of flow table matches. */
+ __u64 n_missed; /* Number of flow table misses. */
+ __u64 n_lost; /* Number of misses not sent to userspace. */
+
+ /* Queues. */
+ __u16 max_miss_queue; /* Max length of XFLOWL_MISS queue. */
+ __u16 max_action_queue; /* Max length of XFLOWL_ACTION queue. */
+ __u16 max_sflow_queue; /* Max length of XFLOWL_SFLOW queue. */
+};
+
+/* Logical ports. */
+#define XFLOWP_LOCAL ((__u16)0)
+#define XFLOWP_NONE ((__u16)-1)
+#define XFLOWP_NORMAL ((__u16)-2)
+
+/* Listening channels. */
+#define _XFLOWL_MISS_NR 0 /* Packet missed in flow table. */
+#define XFLOWL_MISS (1 << _XFLOWL_MISS_NR)
+#define _XFLOWL_ACTION_NR 1 /* Packet output to XFLOWP_CONTROLLER. */
+#define XFLOWL_ACTION (1 << _XFLOWL_ACTION_NR)
+#define _XFLOWL_SFLOW_NR 2 /* sFlow samples. */
+#define XFLOWL_SFLOW (1 << _XFLOWL_SFLOW_NR)
+#define XFLOWL_ALL (XFLOWL_MISS | XFLOWL_ACTION | XFLOWL_SFLOW)
+
+/**
+ * struct xflow_msg - format of messages read from datapath fd.
+ * @type: One of the %_XFLOWL_* constants.
+ * @length: Total length of message, including this header.
+ * @port: Port that received the packet embedded in this message.
+ * @reserved: Not currently used. Should be set to 0.
+ * @arg: Argument value whose meaning depends on @type.
+ *
+ * For @type == %_XFLOWL_MISS_NR, the header is followed by packet data. The
- int n_ports;
++ * @arg member is the ID (in network byte order) of the tunnel that
++ * encapsulated this packet. It is 0 if the packet was not received on a tunnel. *
+ * For @type == %_XFLOWL_ACTION_NR, the header is followed by packet data. The
+ * @arg member is copied from the &struct xflow_action_controller that caused
+ * the &struct xflow_msg to be composed.
+ *
+ * For @type == %_XFLOWL_SFLOW_NR, the header is followed by &struct
+ * xflow_sflow_sample_header, then by an array of &union xflow_action (the
+ * number of which is specified in &struct xflow_sflow_sample_header), then by
+ * packet data.
+ */
+struct xflow_msg {
+ __u32 type;
+ __u32 length;
+ __u16 port;
+ __u16 reserved;
+ __u32 arg;
+};
+
+/**
+ * struct xflow_sflow_sample_header - header added to sFlow sampled packet.
+ * @sample_pool: Number of packets that were candidates for sFlow sampling,
+ * regardless of whether they were actually chosen and sent down to userspace.
+ * @n_actions: Number of "union xflow_action"s immediately following this
+ * header.
+ *
+ * This header follows &struct xflow_msg when that structure's @type is
+ * %_XFLOWL_SFLOW_NR, and it is itself followed by an array of &union
+ * xflow_action (the number of which is specified in @n_actions) and then by
+ * packet data.
+ */
+struct xflow_sflow_sample_header {
+ __u32 sample_pool;
+ __u32 n_actions;
+};
+
+#define XFLOW_PORT_INTERNAL (1 << 0) /* This port is simulated. */
+struct xflow_port {
+ char devname[16]; /* IFNAMSIZ */
+ __u16 port;
+ __u16 flags;
+ __u32 reserved2;
+};
+
+struct xflow_portvec {
+ struct xflow_port *ports;
- int n_flows;
++ __u32 n_ports;
+};
+
+struct xflow_port_group {
+ __u16 *ports;
+ __u16 n_ports; /* Number of ports. */
+ __u16 group; /* Group number. */
+};
+
+struct xflow_flow_stats {
+ __u64 n_packets; /* Number of matched packets. */
+ __u64 n_bytes; /* Number of matched bytes. */
+ __u64 used_sec; /* Time last used. */
+ __u32 used_nsec;
+ __u8 tcp_flags;
+ __u8 ip_tos;
+ __u16 error; /* Used by XFLOW_FLOW_GET. */
+};
+
+/*
+ * The datapath protocol adopts the Linux convention for TCI fields: if an
+ * 802.1Q header is present then its TCI value is used verbatim except that the
+ * CFI bit (0x1000) is always set to 1, and all-bits-zero indicates no 802.1Q
+ * header.
+ */
+#define XFLOW_TCI_PRESENT 0x1000 /* CFI bit */
+
+struct xflow_key {
++ __be32 tun_id; /* Encapsulating tunnel ID. */
+ __be32 nw_src; /* IP source address. */
+ __be32 nw_dst; /* IP destination address. */
+ __u16 in_port; /* Input switch port. */
+ __be16 dl_tci; /* All zeros if 802.1Q header absent,
+ * XFLOW_TCI_PRESENT set if present. */
+ __be16 dl_type; /* Ethernet frame type. */
+ __be16 tp_src; /* TCP/UDP source port. */
+ __be16 tp_dst; /* TCP/UDP destination port. */
+ __u8 dl_src[ETH_ALEN]; /* Ethernet source address. */
+ __u8 dl_dst[ETH_ALEN]; /* Ethernet destination address. */
+ __u8 nw_proto; /* IP protocol or low 8 bits of ARP opcode. */
+ __u8 nw_tos; /* IP ToS (DSCP field, 6 bits). */
+};
+
+/* Flags for XFLOW_FLOW. */
+#define XFLOWFF_ZERO_TCP_FLAGS (1 << 0) /* Zero the TCP flags. */
+
+struct xflow_flow {
+ struct xflow_flow_stats stats;
+ struct xflow_key key;
+ union xflow_action *actions;
+ __u32 n_actions;
+ __u32 flags;
+};
+
+/* Flags for XFLOW_FLOW_PUT. */
+#define XFLOWPF_CREATE (1 << 0) /* Allow creating a new flow. */
+#define XFLOWPF_MODIFY (1 << 1) /* Allow modifying an existing flow. */
+#define XFLOWPF_ZERO_STATS (1 << 2) /* Zero the stats of existing flow. */
+
+/* XFLOW_FLOW_PUT argument. */
+struct xflow_flow_put {
+ struct xflow_flow flow;
+ __u32 flags;
+};
+
+struct xflow_flowvec {
+ struct xflow_flow *flows;
- #define XFLOWAT_N_ACTIONS 12
++ __u32 n_flows;
+};
+
+/* Action types. */
+#define XFLOWAT_OUTPUT 0 /* Output to switch port. */
+#define XFLOWAT_OUTPUT_GROUP 1 /* Output to all ports in group. */
+#define XFLOWAT_CONTROLLER 2 /* Send copy to controller. */
+#define XFLOWAT_SET_DL_TCI 3 /* Set the 802.1q VLAN VID and/or PCP. */
+#define XFLOWAT_STRIP_VLAN 4 /* Strip the 802.1q header. */
+#define XFLOWAT_SET_DL_SRC 5 /* Ethernet source address. */
+#define XFLOWAT_SET_DL_DST 6 /* Ethernet destination address. */
+#define XFLOWAT_SET_NW_SRC 7 /* IP source address. */
+#define XFLOWAT_SET_NW_DST 8 /* IP destination address. */
+#define XFLOWAT_SET_NW_TOS 9 /* IP ToS/DSCP field (6 bits). */
+#define XFLOWAT_SET_TP_SRC 10 /* TCP/UDP source port. */
+#define XFLOWAT_SET_TP_DST 11 /* TCP/UDP destination port. */
++#define XFLOWAT_SET_TUNNEL 12 /* Set the encapsulating tunnel ID. */
++#define XFLOWAT_N_ACTIONS 13
+
+struct xflow_action_output {
+ __u16 type; /* XFLOWAT_OUTPUT. */
+ __u16 port; /* Output port. */
+ __u16 reserved1;
+ __u16 reserved2;
+};
+
+struct xflow_action_output_group {
+ __u16 type; /* XFLOWAT_OUTPUT_GROUP. */
+ __u16 group; /* Group number. */
+ __u16 reserved1;
+ __u16 reserved2;
+};
+
+struct xflow_action_controller {
+ __u16 type; /* XFLOWAT_OUTPUT_CONTROLLER. */
+ __u16 reserved;
+ __u32 arg; /* Copied to struct xflow_msg 'arg' member. */
+};
+
++struct xflow_action_tunnel {
++ __u16 type; /* XFLOWAT_SET_TUNNEL. */
++ __u16 reserved;
++ __be32 tun_id; /* Tunnel ID. */
++};
++
+/* Action structure for XFLOWAT_SET_DL_TCI. */
+struct xflow_action_dl_tci {
+ __u16 type; /* XFLOWAT_SET_DL_TCI. */
+ __be16 tci; /* New TCI. Bits not in mask must be zero. */
+ __be16 mask; /* 0x0fff to set VID, 0xe000 to set PCP,
+ or 0xefff to set both. */
+ __u16 reserved;
+};
+
+/* Action structure for XFLOWAT_SET_DL_SRC/DST. */
+struct xflow_action_dl_addr {
+ __u16 type; /* XFLOWAT_SET_DL_SRC/DST. */
+ __u8 dl_addr[ETH_ALEN]; /* Ethernet address. */
+};
+
+/* Action structure for XFLOWAT_SET_NW_SRC/DST. */
+struct xflow_action_nw_addr {
+ __u16 type; /* XFLOWAT_SET_TW_SRC/DST. */
+ __u16 reserved;
+ __be32 nw_addr; /* IP address. */
+};
+
+struct xflow_action_nw_tos {
+ __u16 type; /* XFLOWAT_SET_NW_TOS. */
+ __u8 nw_tos; /* IP ToS/DSCP field (6 bits). */
+ __u8 reserved1;
+ __u16 reserved2;
+ __u16 reserved3;
+};
+
+/* Action structure for XFLOWAT_SET_TP_SRC/DST. */
+struct xflow_action_tp_port {
+ __u16 type; /* XFLOWAT_SET_TP_SRC/DST. */
+ __be16 tp_port; /* TCP/UDP port. */
+ __u16 reserved1;
+ __u16 reserved2;
+};
+
+union xflow_action {
+ __u16 type;
+ struct xflow_action_output output;
+ struct xflow_action_output_group output_group;
+ struct xflow_action_controller controller;
++ struct xflow_action_tunnel tunnel;
+ struct xflow_action_dl_tci dl_tci;
+ struct xflow_action_dl_addr dl_addr;
+ struct xflow_action_nw_addr nw_addr;
+ struct xflow_action_nw_tos nw_tos;
+ struct xflow_action_tp_port tp_port;
+};
+
+struct xflow_execute {
+ __u16 in_port;
+ __u16 reserved1;
+ __u32 reserved2;
+
+ union xflow_action *actions;
+ __u32 n_actions;
+
+ const void *data;
+ __u32 length;
+};
+
++#define VPORT_TYPE_SIZE 16
++struct xflow_vport_add {
++ char port_type[VPORT_TYPE_SIZE];
++ char devname[16]; /* IFNAMSIZ */
++ void *config;
++};
++
++struct xflow_vport_mod {
++ char devname[16]; /* IFNAMSIZ */
++ void *config;
++};
++
++struct xflow_vport_stats {
++ __u64 rx_packets;
++ __u64 tx_packets;
++ __u64 rx_bytes;
++ __u64 tx_bytes;
++ __u64 rx_dropped;
++ __u64 tx_dropped;
++ __u64 rx_errors;
++ __u64 tx_errors;
++ __u64 rx_frame_err;
++ __u64 rx_over_err;
++ __u64 rx_crc_err;
++ __u64 collisions;
++};
++
++struct xflow_vport_stats_req {
++ char devname[16]; /* IFNAMSIZ */
++ struct xflow_vport_stats stats;
++};
++
++struct xflow_vport_ether {
++ char devname[16]; /* IFNAMSIZ */
++ unsigned char ether_addr[ETH_ALEN];
++};
++
++struct xflow_vport_mtu {
++ char devname[16]; /* IFNAMSIZ */
++ __u16 mtu;
++};
++
+/* Values below this cutoff are 802.3 packets and the two bytes
+ * following MAC addresses are used as a frame length. Otherwise, the
+ * two bytes are used as the Ethernet type.
+ */
+#define XFLOW_DL_TYPE_ETH2_CUTOFF 0x0600
+
+/* Value of dl_type to indicate that the frame does not include an
+ * Ethernet type.
+ */
+#define XFLOW_DL_TYPE_NOT_ETH_TYPE 0x05ff
+
+#endif /* openvswitch/xflow.h */
lib/dhparams.h
EXTRA_DIST += \
- lib/common.man \
lib/common-syn.man \
- lib/daemon.man \
+ lib/common.man \
lib/daemon-syn.man \
- lib/dpif.man \
+ lib/daemon.man \
lib/leak-checker.man \
- lib/ssl-bootstrap.man \
lib/ssl-bootstrap-syn.man \
+ lib/ssl-bootstrap.man \
lib/ssl-peer-ca-cert.man \
- lib/ssl.man \
lib/ssl-syn.man \
+ lib/ssl.man \
+ lib/unixctl.man \
+ lib/unixctl-syn.man \
lib/vconn-active.man \
lib/vconn-passive.man \
- lib/vlog-unixctl.man \
lib/vlog-syn.man \
- lib/vlog.man
-
+ lib/vlog-unixctl.man \
+ lib/vlog.man \
+ lib/xfif.man
-
lib/dirs.c: Makefile
($(ro_c) && \
echo 'const char ovs_pkgdatadir[] = "$(pkgdatadir)";' && \
static bool rules_match_2wild(const struct cls_rule *wild1,
const struct cls_rule *wild2, int field_idx);
--/* Converts the flow in 'flow' into a cls_rule in 'rule', with the given
-- * 'wildcards' and 'priority'.*/
++/* Converts the flow in 'flow' into a cls_rule in 'rule'. */
void
- cls_rule_from_flow(struct cls_rule *rule, const flow_t *flow)
-cls_rule_from_flow(const flow_t *flow, uint32_t wildcards,
- unsigned int priority, struct cls_rule *rule)
++cls_rule_from_flow(const flow_t *flow, struct cls_rule *rule)
{
- assert(!flow->reserved[0] && !flow->reserved[1] && !flow->reserved[2]);
rule->flow = *flow;
- flow_wildcards_init(&rule->wc, wildcards);
- rule->priority = priority;
- rule->table_idx = table_idx_from_wildcards(rule->wc.wildcards);
+ if (!rule->flow.wildcards && rule->flow.priority < UINT16_MAX) {
+ rule->flow.priority = UINT16_MAX;
+ }
+ flow_wildcards_init(&rule->wc, flow->wildcards);
+ rule->table_idx = table_idx_from_wildcards(flow->wildcards);
}
/* Converts the ofp_match in 'match' into a cls_rule in 'rule', with the given
- * 'priority'. */
+ * 'priority'. If 'tun_id_from_cookie' is set then the upper 32 bits of
+ * 'cookie' are stored in the rule as the tunnel ID. */
void
- cls_rule_from_match(struct cls_rule *rule, unsigned int priority,
- const struct ofp_match *match)
+ cls_rule_from_match(const struct ofp_match *match, unsigned int priority,
+ bool tun_id_from_cookie, uint64_t cookie,
+ struct cls_rule *rule)
{
- flow_from_match(&rule->flow,
- rule->flow.wildcards ? priority : UINT16_MAX,
- match);
- uint32_t wildcards;
- flow_from_match(match, tun_id_from_cookie, cookie, &rule->flow, &wildcards);
- flow_wildcards_init(&rule->wc, wildcards);
- rule->priority = rule->wc.wildcards ? priority : UINT16_MAX;
- rule->table_idx = table_idx_from_wildcards(rule->wc.wildcards);
++ flow_from_match(match, rule->flow.wildcards ? priority : UINT16_MAX,
++ tun_id_from_cookie, cookie, &rule->flow);
+ flow_wildcards_init(&rule->wc, rule->flow.wildcards);
+ rule->table_idx = table_idx_from_wildcards(rule->flow.wildcards);
}
/* Converts 'rule' to a string and returns the string. The caller must free
struct cls_rule target;
int i;
- cls_rule_from_flow(&target, flow);
- cls_rule_from_flow(flow, 0, 0, &target);
++ cls_rule_from_flow(flow, &target);
for (i = 0; i < CLS_N_FIELDS; i++) {
struct cls_rule *rule = search_table(&cls->tables[i], i, &target);
- if (rule && (!best || rule->priority > best->priority)) {
+ if (rule && (!best || rule->flow.priority > best->flow.priority)) {
best = rule;
}
}
return search_exact_table(cls, flow_hash(target, 0), target);
}
- assert(target->wildcards == (target->wildcards & OFPFW_ALL));
- assert(wildcards == (wildcards & OVSFW_ALL));
- table_idx = table_idx_from_wildcards(wildcards);
++ assert(target->wildcards == (target->wildcards & OVSFW_ALL));
+ table_idx = table_idx_from_wildcards(target->wildcards);
hash = hash_fields(target, table_idx);
HMAP_FOR_EACH_WITH_HASH (bucket, struct cls_bucket, hmap_node, hash,
&cls->tables[table_idx]) {
true : false;
}
- cls_rule_from_flow(&target_rule, target);
- cls_rule_from_flow(target, wildcards, priority, &target_rule);
++ cls_rule_from_flow(target, &target_rule);
for (tbl = &cls->tables[0]; tbl < &cls->tables[CLS_N_FIELDS]; tbl++) {
struct cls_bucket *bucket;
* wildcards and an exact-match rule will never be in the same table. */
void
classifier_for_each_match(const struct classifier *cls,
- const struct cls_rule *target,
+ const flow_t *target_flow,
int include, cls_cb_func *callback, void *aux)
{
- cls_rule_from_flow(&target, target_flow);
+ struct cls_rule target;
+
++ cls_rule_from_flow(target_flow, &target);
if (include & CLS_INC_WILD) {
const struct hmap *table;
#include "flow.h"
#include "hmap.h"
#include "list.h"
+ #include "openflow/nicira-ext.h"
#include "openflow/openflow.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/* Number of bytes of fields in a rule. */
- #define CLS_N_BYTES 31
+ #define CLS_N_BYTES 37
/* Fields in a rule.
*
unsigned int table_idx; /* Index into struct classifier 'tables'. */
};
- void cls_rule_from_flow(struct cls_rule *, const flow_t *);
- void cls_rule_from_match(struct cls_rule *, unsigned int priority,
- const struct ofp_match *);
-void cls_rule_from_flow(const flow_t *, uint32_t wildcards,
- unsigned int priority, struct cls_rule *);
++void cls_rule_from_flow(const flow_t *, struct cls_rule *);
+ void cls_rule_from_match(const struct ofp_match *, unsigned int priority,
+ bool tun_id_from_cookie, uint64_t cookie,
+ struct cls_rule *);
char *cls_rule_to_string(const struct cls_rule *);
void cls_rule_print(const struct cls_rule *);
void cls_rule_moved(struct classifier *,
return ofpbuf_try_pull(packet, VLAN_HEADER_LEN);
}
- /* Returns 1 if 'packet' is an IP fragment, 0 otherwise. */
+ /* Returns 1 if 'packet' is an IP fragment, 0 otherwise.
+ * 'tun_id' is in network byte order, while 'in_port' is in host byte order.
- * These byte orders are the same as they are in struct odp_flow_key. */
++ * These byte orders are the same as they are in struct xflow_key. */
int
- flow_extract(struct ofpbuf *packet, uint16_t in_port, flow_t *flow)
+ flow_extract(struct ofpbuf *packet, uint32_t tun_id, uint16_t in_port,
+ flow_t *flow)
{
struct ofpbuf b = *packet;
struct eth_header *eth;
stats->n_packets = 1;
}
--/* Extract 'flow' with 'wildcards' into the OpenFlow match structure
-- * 'match'. */
++/* Extract 'flow' into the OpenFlow match structure 'match'. */
void
- flow_to_match(const flow_t *flow, struct ofp_match *match)
-flow_to_match(const flow_t *flow, uint32_t wildcards, bool tun_id_from_cookie,
++flow_to_match(const flow_t *flow, bool tun_id_from_cookie,
+ struct ofp_match *match)
{
- match->wildcards = htonl(flow->wildcards);
- if (!tun_id_from_cookie) {
- wildcards &= OFPFW_ALL;
- }
- match->wildcards = htonl(wildcards);
++ uint32_t wildcard_mask = tun_id_from_cookie ? OVSFW_ALL : OFPFW_ALL;
++ match->wildcards = htonl(flow->wildcards & wildcard_mask);
+
- match->in_port = htons(flow->in_port == ODPP_LOCAL ? OFPP_LOCAL
+ match->in_port = htons(flow->in_port == XFLOWP_LOCAL ? OFPP_LOCAL
: flow->in_port);
match->dl_vlan = flow->dl_vlan;
match->dl_vlan_pcp = flow->dl_vlan_pcp;
}
void
- flow_from_match(flow_t *flow, unsigned int priority,
- const struct ofp_match *match)
-flow_from_match(const struct ofp_match *match, bool tun_id_from_cookie,
- uint64_t cookie, flow_t *flow, uint32_t *flow_wildcards)
++flow_from_match(const struct ofp_match *match, uint32_t priority,
++ bool tun_id_from_cookie, uint64_t cookie, flow_t *flow)
{
- uint32_t wildcards = ntohl(match->wildcards);
-
+ flow->wildcards = ntohl(match->wildcards);
+ flow->priority = priority;
flow->nw_src = match->nw_src;
flow->nw_dst = match->nw_dst;
- if (tun_id_from_cookie && !(wildcards & NXFW_TUN_ID)) {
++ if (tun_id_from_cookie && !(flow->wildcards & NXFW_TUN_ID)) {
+ flow->tun_id = htonl(ntohll(cookie) >> 32);
+ } else {
- wildcards |= NXFW_TUN_ID;
++ flow->wildcards |= NXFW_TUN_ID;
+ flow->tun_id = 0;
+ }
- flow->in_port = (match->in_port == htons(OFPP_LOCAL) ? ODPP_LOCAL
+ flow->in_port = (match->in_port == htons(OFPP_LOCAL) ? XFLOWP_LOCAL
: ntohs(match->in_port));
flow->dl_vlan = match->dl_vlan;
flow->dl_vlan_pcp = match->dl_vlan_pcp;
void
flow_format(struct ds *ds, const flow_t *flow)
{
- ds_put_format(ds, "tunnel%08"PRIx32":in_port%04"PRIx16
+ ds_put_format(ds, "wild%08"PRIx32" pri%"PRIu32" "
- "in_port%04x:vlan%d:pcp%d mac"ETH_ADDR_FMT
- "->"ETH_ADDR_FMT" type%04x proto%"PRId8" tos%"PRIu8
- " ip"IP_FMT"->"IP_FMT" port%d->%d",
- flow->wildcards, flow->priority,
- flow->in_port, ntohs(flow->dl_vlan), flow->dl_vlan_pcp,
- ETH_ADDR_ARGS(flow->dl_src), ETH_ADDR_ARGS(flow->dl_dst),
- ntohs(flow->dl_type), flow->nw_proto, flow->nw_tos,
- IP_ARGS(&flow->nw_src), IP_ARGS(&flow->nw_dst),
- ntohs(flow->tp_src), ntohs(flow->tp_dst));
++ "tunnel%08"PRIx32":in_port%04"PRIx16
+ ":vlan%"PRIu16":pcp%"PRIu8
+ " mac"ETH_ADDR_FMT"->"ETH_ADDR_FMT
+ " type%04"PRIx16
+ " proto%"PRIu8
+ " tos%"PRIu8
+ " ip"IP_FMT"->"IP_FMT
+ " port%"PRIu16"->%"PRIu16,
++ flow->wildcards,
++ flow->priority,
+ ntohl(flow->tun_id),
+ flow->in_port,
+ ntohs(flow->dl_vlan),
+ flow->dl_vlan_pcp,
+ ETH_ADDR_ARGS(flow->dl_src),
+ ETH_ADDR_ARGS(flow->dl_dst),
+ ntohs(flow->dl_type),
+ flow->nw_proto,
+ flow->nw_tos,
+ IP_ARGS(&flow->nw_src),
+ IP_ARGS(&flow->nw_dst),
+ ntohs(flow->tp_src),
+ ntohs(flow->tp_dst));
}
void
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
- #include "hash.h"
+ #include "openflow/nicira-ext.h"
#include "openflow/openflow.h"
-#include "openvswitch/datapath-protocol.h"
+ #include "hash.h"
+#include "openvswitch/xflow.h"
#include "util.h"
struct ds;
struct ofp_match;
struct ofpbuf;
-typedef struct odp_flow_key flow_t;
+typedef struct flow flow_t;
+struct flow {
+ uint32_t wildcards; /* Wildcards. */
+ uint32_t priority; /* Priority. */
++ uint32_t tun_id; /* Encapsulating tunnel ID. */
+ uint32_t nw_src; /* IP source address. */
+ uint32_t nw_dst; /* IP destination address. */
+ uint16_t in_port; /* Input switch port. */
+ uint16_t dl_vlan; /* Input VLAN. */
+ uint16_t dl_type; /* Ethernet frame type. */
+ uint16_t tp_src; /* TCP/UDP source port. */
+ uint16_t tp_dst; /* TCP/UDP destination port. */
+ uint8_t dl_src[ETH_ALEN]; /* Ethernet source address. */
+ uint8_t dl_dst[ETH_ALEN]; /* Ethernet destination address. */
+ uint8_t nw_proto; /* IP protocol or low 8 bits of ARP opcode. */
+ uint8_t dl_vlan_pcp; /* Input VLAN priority. */
+ uint8_t nw_tos; /* IP ToS (DSCP field, 6 bits). */
+};
+
+/* Assert that there are FLOW_SIG_SIZE bytes of significant data in "struct
+ * flow", followed by FLOW_PAD_SIZE bytes of padding. */
- #define FLOW_SIG_SIZE 41
++#define FLOW_SIG_SIZE 45
+#define FLOW_PAD_SIZE 3
+BUILD_ASSERT_DECL(offsetof(struct flow, nw_tos) == FLOW_SIG_SIZE - 1);
+BUILD_ASSERT_DECL(sizeof(((struct flow *)0)->nw_tos) == 1);
+BUILD_ASSERT_DECL(sizeof(struct flow) == FLOW_SIG_SIZE + FLOW_PAD_SIZE);
- int flow_extract(struct ofpbuf *, uint16_t in_port, flow_t *);
+ int flow_extract(struct ofpbuf *, uint32_t tun_id, uint16_t in_port, flow_t *);
void flow_extract_stats(const flow_t *flow, struct ofpbuf *packet,
- struct odp_flow_stats *stats);
-void flow_to_match(const flow_t *, uint32_t wildcards, bool tun_id_cookie,
- struct ofp_match *);
-void flow_from_match(const struct ofp_match *, bool tun_id_from_cookie,
- uint64_t cookie, flow_t *, uint32_t *wildcards);
+ struct xflow_flow_stats *stats);
- void flow_to_match(const flow_t *, struct ofp_match *);
- void flow_from_match(flow_t *, unsigned int priority,
- const struct ofp_match *);
++void flow_to_match(const flow_t *,
++ bool tun_id_from_cookie, struct ofp_match *);
++void flow_from_match(const struct ofp_match *, uint32_t priority,
++ bool tun_id_from_cookie, uint64_t cookie, flow_t *);
char *flow_to_string(const flow_t *);
void flow_format(struct ds *, const flow_t *);
void flow_print(FILE *, const flow_t *);
static inline void
flow_wildcards_init(struct flow_wildcards *wc, uint32_t wildcards)
{
- wildcards &= OFPFW_ALL;
- wc->wildcards = wildcards & OVSFW_ALL;
- wc->nw_src_mask = flow_nw_bits_to_mask(wc->wildcards, OFPFW_NW_SRC_SHIFT);
- wc->nw_dst_mask = flow_nw_bits_to_mask(wc->wildcards, OFPFW_NW_DST_SHIFT);
++ wildcards &= OVSFW_ALL;
+ wc->nw_src_mask = flow_nw_bits_to_mask(wildcards, OFPFW_NW_SRC_SHIFT);
+ wc->nw_dst_mask = flow_nw_bits_to_mask(wildcards, OFPFW_NW_DST_SHIFT);
}
#endif /* flow.h */
--- /dev/null
-#include "openvswitch/datapath-protocol.h"
+ /*
+ * Copyright (c) 2010 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ #include <config.h>
+ #include <errno.h>
+ #include <fcntl.h>
+ #include <net/if.h>
+ #include <sys/ioctl.h>
+
+ #include "list.h"
+ #include "netdev-provider.h"
+ #include "openflow/openflow.h"
- struct odp_vport_add ova;
+ #include "openvswitch/gre.h"
++#include "openvswitch/xflow.h"
+ #include "packets.h"
+ #include "shash.h"
+ #include "socket-util.h"
+
+ #define THIS_MODULE VLM_netdev_gre
+ #include "vlog.h"
+
+ struct netdev_dev_gre {
+ struct netdev_dev netdev_dev;
+ };
+
+ struct netdev_gre {
+ struct netdev netdev;
+ };
+
+ struct netdev_gre_notifier {
+ struct netdev_notifier notifier;
+ struct list node;
+ };
+
+ static int ioctl_fd = -1;
+ static struct shash netdev_gre_notifiers =
+ SHASH_INITIALIZER(&netdev_gre_notifiers);
+
+ static void poll_notify(const struct netdev_gre *netdev);
+
+ static struct netdev_dev_gre *
+ netdev_dev_gre_cast(const struct netdev_dev *netdev_dev)
+ {
+ netdev_dev_assert_class(netdev_dev, &netdev_gre_class);
+ return CONTAINER_OF(netdev_dev, struct netdev_dev_gre, netdev_dev);
+ }
+
+ static struct netdev_gre *
+ netdev_gre_cast(const struct netdev *netdev)
+ {
+ netdev_assert_class(netdev, &netdev_gre_class);
+ return CONTAINER_OF(netdev, struct netdev_gre, netdev);
+ }
+
+ static int
+ netdev_gre_init(void)
+ {
+ static int status = -1;
+ if (status < 0) {
+ ioctl_fd = open("/dev/net/dp0", O_RDONLY | O_NONBLOCK);
+ status = ioctl_fd >= 0 ? 0 : errno;
+ if (status) {
+ VLOG_ERR("failed to open ioctl fd: %s", strerror(status));
+ }
+ }
+ return status;
+ }
+
+ static int
+ do_ioctl(int cmd, void *arg)
+ {
+ return ioctl(ioctl_fd, cmd, arg) ? errno : 0;
+ }
+
+ static int
+ parse_config(const char *name, const struct shash *args,
+ struct gre_port_config *config)
+ {
+ struct shash_node *node;
+
+ memset(config, 0, sizeof *config);
+
+ config->flags |= GRE_F_IN_CSUM;
+ config->flags |= GRE_F_OUT_CSUM;
+ config->flags |= GRE_F_PMTUD;
+
+ SHASH_FOR_EACH (node, args) {
+ if (!strcmp(node->name, "remote_ip")) {
+ struct in_addr in_addr;
+ if (lookup_ip(node->data, &in_addr)) {
+ VLOG_WARN("%s: bad gre 'remote_ip'", name);
+ } else {
+ config->daddr = in_addr.s_addr;
+ }
+ } else if (!strcmp(node->name, "local_ip")) {
+ struct in_addr in_addr;
+ if (lookup_ip(node->data, &in_addr)) {
+ VLOG_WARN("%s: bad gre 'local_ip'", name);
+ } else {
+ config->saddr = in_addr.s_addr;
+ }
+ } else if (!strcmp(node->name, "key")) {
+ if (!strcmp(node->data, "flow")) {
+ config->flags |= GRE_F_IN_KEY_MATCH;
+ config->flags |= GRE_F_OUT_KEY_ACTION;
+ } else {
+ config->out_key = config->in_key = htonl(atoi(node->data));
+ }
+ } else if (!strcmp(node->name, "in_key")) {
+ if (!strcmp(node->data, "flow")) {
+ config->flags |= GRE_F_IN_KEY_MATCH;
+ } else {
+ config->in_key = htonl(atoi(node->data));
+ }
+ } else if (!strcmp(node->name, "out_key")) {
+ if (!strcmp(node->data, "flow")) {
+ config->flags |= GRE_F_OUT_KEY_ACTION;
+ } else {
+ config->out_key = htonl(atoi(node->data));
+ }
+ } else if (!strcmp(node->name, "tos")) {
+ if (!strcmp(node->data, "inherit")) {
+ config->flags |= GRE_F_TOS_INHERIT;
+ } else {
+ config->tos = atoi(node->data);
+ }
+ } else if (!strcmp(node->name, "ttl")) {
+ if (!strcmp(node->data, "inherit")) {
+ config->flags |= GRE_F_TTL_INHERIT;
+ } else {
+ config->ttl = atoi(node->data);
+ }
+ } else if (!strcmp(node->name, "csum")) {
+ if (!strcmp(node->data, "false")) {
+ config->flags &= ~GRE_F_IN_CSUM;
+ config->flags &= ~GRE_F_OUT_CSUM;
+ }
+ } else if (!strcmp(node->name, "pmtud")) {
+ if (!strcmp(node->data, "false")) {
+ config->flags &= ~GRE_F_PMTUD;
+ }
+ } else {
+ VLOG_WARN("%s: unknown gre argument '%s'", name, node->name);
+ }
+ }
+
+ if (!config->daddr) {
+ VLOG_WARN("%s: gre type requires valid 'remote_ip' argument", name);
+ return EINVAL;
+ }
+
+ return 0;
+ }
+
+ static int
+ netdev_gre_create(const char *name, const char *type OVS_UNUSED,
+ const struct shash *args, struct netdev_dev **netdev_devp)
+ {
+ int err;
- err = do_ioctl(ODP_VPORT_ADD, &ova);
++ struct xflow_vport_add ova;
+ struct gre_port_config port_config;
+ struct netdev_dev_gre *netdev_dev;
+
+ ovs_strlcpy(ova.port_type, "gre", sizeof ova.port_type);
+ ovs_strlcpy(ova.devname, name, sizeof ova.devname);
+ ova.config = &port_config;
+
+ err = parse_config(name, args, &port_config);
+ if (err) {
+ return err;
+ }
+
- err = do_ioctl(ODP_VPORT_DEL, ova.devname);
++ err = do_ioctl(XFLOW_VPORT_ADD, &ova);
+ if (err == EEXIST) {
+ VLOG_WARN("%s: destroying existing device", name);
+
- err = do_ioctl(ODP_VPORT_ADD, &ova);
++ err = do_ioctl(XFLOW_VPORT_DEL, ova.devname);
+ if (err) {
+ return err;
+ }
+
- struct odp_vport_mod ovm;
++ err = do_ioctl(XFLOW_VPORT_ADD, &ova);
+ }
+
+ if (err) {
+ return err;
+ }
+
+ netdev_dev = xmalloc(sizeof *netdev_dev);
+ netdev_dev_init(&netdev_dev->netdev_dev, name, &netdev_gre_class);
+
+ *netdev_devp = &netdev_dev->netdev_dev;
+ return 0;
+ }
+
+ static int
+ netdev_gre_reconfigure(struct netdev_dev *netdev_dev_, const struct shash *args)
+ {
+ const char *name = netdev_dev_get_name(netdev_dev_);
- return do_ioctl(ODP_VPORT_MOD, &ovm);
++ struct xflow_vport_mod ovm;
+ struct gre_port_config port_config;
+ int err;
+
+ ovs_strlcpy(ovm.devname, name, sizeof ovm.devname);
+ ovm.config = &port_config;
+
+ err = parse_config(name, args, &port_config);
+ if (err) {
+ return err;
+ }
+
- do_ioctl(ODP_VPORT_DEL, (char *)netdev_dev_get_name(netdev_dev_));
++ return do_ioctl(XFLOW_VPORT_MOD, &ovm);
+ }
+
+ static void
+ netdev_gre_destroy(struct netdev_dev *netdev_dev_)
+ {
+ struct netdev_dev_gre *netdev_dev = netdev_dev_gre_cast(netdev_dev_);
+
- struct odp_vport_ether vport_ether;
++ do_ioctl(XFLOW_VPORT_DEL, (char *)netdev_dev_get_name(netdev_dev_));
+ free(netdev_dev);
+ }
+
+ static int
+ netdev_gre_open(struct netdev_dev *netdev_dev_, int ethertype OVS_UNUSED,
+ struct netdev **netdevp)
+ {
+ struct netdev_gre *netdev;
+
+ netdev = xmalloc(sizeof *netdev);
+ netdev_init(&netdev->netdev, netdev_dev_);
+
+ *netdevp = &netdev->netdev;
+ return 0;
+ }
+
+ static void
+ netdev_gre_close(struct netdev *netdev_)
+ {
+ struct netdev_gre *netdev = netdev_gre_cast(netdev_);
+ free(netdev);
+ }
+
+ static int
+ netdev_gre_set_etheraddr(struct netdev *netdev_,
+ const uint8_t mac[ETH_ADDR_LEN])
+ {
+ struct netdev_gre *netdev = netdev_gre_cast(netdev_);
- err = ioctl(ioctl_fd, ODP_VPORT_ETHER_SET, &vport_ether);
++ struct xflow_vport_ether vport_ether;
+ int err;
+
+ ovs_strlcpy(vport_ether.devname, netdev_get_name(netdev_),
+ sizeof vport_ether.devname);
+
+ memcpy(vport_ether.ether_addr, mac, ETH_ADDR_LEN);
+
- struct odp_vport_ether vport_ether;
++ err = ioctl(ioctl_fd, XFLOW_VPORT_ETHER_SET, &vport_ether);
+ if (err) {
+ return err;
+ }
+
+ poll_notify(netdev);
+ return 0;
+ }
+
+ static int
+ netdev_gre_get_etheraddr(const struct netdev *netdev_,
+ uint8_t mac[ETH_ADDR_LEN])
+ {
- err = ioctl(ioctl_fd, ODP_VPORT_ETHER_GET, &vport_ether);
++ struct xflow_vport_ether vport_ether;
+ int err;
+
+ ovs_strlcpy(vport_ether.devname, netdev_get_name(netdev_),
+ sizeof vport_ether.devname);
+
- struct odp_vport_mtu vport_mtu;
++ err = ioctl(ioctl_fd, XFLOW_VPORT_ETHER_GET, &vport_ether);
+ if (err) {
+ return err;
+ }
+
+ memcpy(mac, vport_ether.ether_addr, ETH_ADDR_LEN);
+ return 0;
+ }
+
+ static int
+ netdev_gre_get_mtu(const struct netdev *netdev_, int *mtup)
+ {
- err = ioctl(ioctl_fd, ODP_VPORT_MTU_GET, &vport_mtu);
++ struct xflow_vport_mtu vport_mtu;
+ int err;
+
+ ovs_strlcpy(vport_mtu.devname, netdev_get_name(netdev_),
+ sizeof vport_mtu.devname);
+
- struct odp_vport_stats_req ovsr;
++ err = ioctl(ioctl_fd, XFLOW_VPORT_MTU_GET, &vport_mtu);
+ if (err) {
+ return err;
+ }
+
+ *mtup = vport_mtu.mtu;
+ return 0;
+ }
+
+ static int
+ netdev_gre_get_carrier(const struct netdev *netdev OVS_UNUSED, bool *carrier)
+ {
+ *carrier = true;
+ return 0;
+ }
+
+ static int
+ netdev_gre_get_stats(const struct netdev *netdev_, struct netdev_stats *stats)
+ {
+ const char *name = netdev_get_name(netdev_);
- err = do_ioctl(ODP_VPORT_STATS_GET, &ovsr);
++ struct xflow_vport_stats_req ovsr;
+ int err;
+
+ ovs_strlcpy(ovsr.devname, name, sizeof ovsr.devname);
++ err = do_ioctl(XFLOW_VPORT_STATS_GET, &ovsr);
+ if (err) {
+ return err;
+ }
+
+ stats->rx_packets = ovsr.stats.rx_packets;
+ stats->tx_packets = ovsr.stats.tx_packets;
+ stats->rx_bytes = ovsr.stats.rx_bytes;
+ stats->tx_bytes = ovsr.stats.tx_bytes;
+ stats->rx_errors = ovsr.stats.rx_errors;
+ stats->tx_errors = ovsr.stats.tx_errors;
+ stats->rx_dropped = ovsr.stats.rx_dropped;
+ stats->tx_dropped = ovsr.stats.tx_dropped;
+ stats->multicast = UINT64_MAX;
+ stats->collisions = ovsr.stats.collisions;
+ stats->rx_length_errors = UINT64_MAX;
+ stats->rx_over_errors = ovsr.stats.rx_over_err;
+ stats->rx_crc_errors = ovsr.stats.rx_crc_err;
+ stats->rx_frame_errors = ovsr.stats.rx_frame_err;
+ stats->rx_fifo_errors = UINT64_MAX;
+ stats->rx_missed_errors = UINT64_MAX;
+ stats->tx_aborted_errors = UINT64_MAX;
+ stats->tx_carrier_errors = UINT64_MAX;
+ stats->tx_fifo_errors = UINT64_MAX;
+ stats->tx_heartbeat_errors = UINT64_MAX;
+ stats->tx_window_errors = UINT64_MAX;
+
+ return 0;
+ }
+
+ static int
+ netdev_gre_update_flags(struct netdev *netdev OVS_UNUSED,
+ enum netdev_flags off, enum netdev_flags on OVS_UNUSED,
+ enum netdev_flags *old_flagsp)
+ {
+ if (off & (NETDEV_UP | NETDEV_PROMISC)) {
+ return EOPNOTSUPP;
+ }
+
+ *old_flagsp = NETDEV_UP | NETDEV_PROMISC;
+ return 0;
+ }
+
+ static int
+ netdev_gre_poll_add(struct netdev *netdev, void (*cb)(struct netdev_notifier *),
+ void *aux, struct netdev_notifier **notifierp)
+ {
+ const char *netdev_name = netdev_get_name(netdev);
+ struct netdev_gre_notifier *notifier;
+ struct list *list;
+
+ list = shash_find_data(&netdev_gre_notifiers, netdev_name);
+ if (!list) {
+ list = xmalloc(sizeof *list);
+ list_init(list);
+ shash_add(&netdev_gre_notifiers, netdev_name, list);
+ }
+
+ notifier = xmalloc(sizeof *notifier);
+ netdev_notifier_init(¬ifier->notifier, netdev, cb, aux);
+ list_push_back(list, ¬ifier->node);
+
+ *notifierp = ¬ifier->notifier;
+ return 0;
+ }
+
+ static void
+ netdev_gre_poll_remove(struct netdev_notifier *notifier_)
+ {
+ struct netdev_gre_notifier *notifier =
+ CONTAINER_OF(notifier_, struct netdev_gre_notifier, notifier);
+ struct list *list;
+
+ list = list_remove(¬ifier->node);
+ if (list_is_empty(list)) {
+ const char *netdev_name = netdev_get_name(notifier_->netdev);
+ shash_delete(&netdev_gre_notifiers,
+ shash_find(&netdev_gre_notifiers, netdev_name));
+ free(list);
+ }
+ free(notifier);
+ }
+
+ static void
+ poll_notify(const struct netdev_gre *netdev)
+ {
+ struct list *list = shash_find_data(&netdev_gre_notifiers,
+ netdev_get_name(&netdev->netdev));
+
+ if (list) {
+ struct netdev_gre_notifier *notifier;
+
+ LIST_FOR_EACH (notifier, struct netdev_gre_notifier, node, list) {
+ struct netdev_notifier *n = ¬ifier->notifier;
+ n->cb(n);
+ }
+ }
+ }
+
+ const struct netdev_class netdev_gre_class = {
+ "gre",
+
+ netdev_gre_init,
+ NULL, /* run */
+ NULL, /* wait */
+
+ netdev_gre_create,
+ netdev_gre_destroy,
+ netdev_gre_reconfigure,
+
+ netdev_gre_open,
+ netdev_gre_close,
+
+ NULL, /* enumerate */
+
+ NULL, /* recv */
+ NULL, /* recv_wait */
+ NULL, /* drain */
+
+ NULL, /* send */
+ NULL, /* send_wait */
+
+ netdev_gre_set_etheraddr,
+ netdev_gre_get_etheraddr,
+ netdev_gre_get_mtu,
+ NULL, /* get_ifindex */
+ netdev_gre_get_carrier,
+ netdev_gre_get_stats,
+ NULL, /* set_stats */
+
+ NULL, /* get_features */
+ NULL, /* set_advertisements */
+ NULL, /* get_vlan_vid */
+ NULL, /* set_policing */
+
+ NULL, /* get_in4 */
+ NULL, /* set_in4 */
+ NULL, /* get_in6 */
+ NULL, /* add_router */
+ NULL, /* get_next_hop */
+ NULL, /* arp_lookup */
+
+ netdev_gre_update_flags,
+
+ netdev_gre_poll_add,
+ netdev_gre_poll_remove,
+ };
struct ofp_match match;
packet.data = (void *) op->data;
packet.size = data_len;
- flow_extract(&packet, ntohs(op->in_port), &flow);
- flow_to_match(&flow, &match);
+ flow_extract(&packet, 0, ntohs(op->in_port), &flow);
- flow_to_match(&flow, 0, false, &match);
++ flow_to_match(&flow, false, &match);
ofp_print_match(string, &match, verbosity);
ds_put_char(string, '\n');
}
VLOG_MODULE(terminal)
VLOG_MODULE(timeval)
VLOG_MODULE(tty)
-VLOG_MODULE(socket_util)
-VLOG_MODULE(switchui)
VLOG_MODULE(unixctl)
VLOG_MODULE(util)
-VLOG_MODULE(vconn_tcp)
+VLOG_MODULE(vconn)
VLOG_MODULE(vconn_ssl)
VLOG_MODULE(vconn_stream)
+VLOG_MODULE(vconn_tcp)
VLOG_MODULE(vconn_unix)
-VLOG_MODULE(vconn)
-VLOG_MODULE(vsctl)
VLOG_MODULE(vlog)
-VLOG_MODULE(wcelim)
+VLOG_MODULE(vsctl)
VLOG_MODULE(vswitchd)
VLOG_MODULE(vt)
+VLOG_MODULE(wcelim)
+VLOG_MODULE(wdp)
+VLOG_MODULE(wdp_xflow)
VLOG_MODULE(xenserver)
+ VLOG_MODULE(xenserverd)
+VLOG_MODULE(xfif)
+VLOG_MODULE(xfif_linux)
+VLOG_MODULE(xfif_netdev)
#undef VLOG_MODULE
#include <sys/ioctl.h>
#include <unistd.h>
-#include "dpif-provider.h"
+ #include "netdev.h"
#include "ofpbuf.h"
#include "poll-loop.h"
#include "rtnetlink.h"
+ #include "shash.h"
#include "svec.h"
#include "util.h"
+#include "xfif-provider.h"
#include "vlog.h"
-#define THIS_MODULE VLM_dpif_linux
+#define THIS_MODULE VLM_xfif_linux
/* Datapath interface for the openvswitch Linux kernel module. */
-struct dpif_linux {
- struct dpif dpif;
+struct xfif_linux {
+ struct xfif xfif;
int fd;
- /* Used by dpif_linux_get_all_names(). */
+ /* Used by xfif_linux_get_all_names(). */
char *local_ifname;
int minor;
}
static void
-dpif_linux_close(struct dpif *dpif_)
+xfif_linux_close(struct xfif *xfif_)
{
- struct dpif_linux *dpif = dpif_linux_cast(dpif_);
- rtnetlink_notifier_unregister(&dpif->port_notifier);
- shash_destroy(&dpif->changed_ports);
- free(dpif->local_ifname);
- close(dpif->fd);
- free(dpif);
+ struct xfif_linux *xfif = xfif_linux_cast(xfif_);
+ rtnetlink_notifier_unregister(&xfif->port_notifier);
- svec_destroy(&xfif->changed_ports);
++ shash_destroy(&xfif->changed_ports);
+ free(xfif->local_ifname);
+ close(xfif->fd);
+ free(xfif);
}
static int
}
static int
-dpif_linux_destroy(struct dpif *dpif_)
+xfif_linux_destroy(struct xfif *xfif_)
{
- struct odp_port *ports;
++ struct xflow_port *ports;
+ size_t n_ports;
+ int err;
+ int i;
+
- err = dpif_port_list(dpif_, &ports, &n_ports);
++ err = xfif_port_list(xfif_, &ports, &n_ports);
+ if (err) {
+ return err;
+ }
+
+ for (i = 0; i < n_ports; i++) {
- if (ports[i].port != ODPP_LOCAL) {
- err = do_ioctl(dpif_, ODP_VPORT_DEL, ports[i].devname);
++ if (ports[i].port != XFLOWP_LOCAL) {
++ err = do_ioctl(xfif_, XFLOW_VPORT_DEL, ports[i].devname);
+ if (err) {
+ VLOG_WARN_RL(&error_rl, "%s: error deleting port %s (%s)",
- dpif_name(dpif_), ports[i].devname, strerror(err));
++ xfif_name(xfif_), ports[i].devname, strerror(err));
+ }
+ }
+ }
+
+ free(ports);
+
- return do_ioctl(dpif_, ODP_DP_DESTROY, NULL);
+ return do_ioctl(xfif_, XFLOW_DP_DESTROY, NULL);
}
static int
memset(&port, 0, sizeof port);
strncpy(port.devname, devname, sizeof port.devname);
port.flags = flags;
- error = do_ioctl(xfif_, XFLOW_PORT_ADD, &port);
- error = do_ioctl(dpif_, ODP_PORT_ATTACH, &port);
++ error = do_ioctl(xfif_, XFLOW_PORT_ATTACH, &port);
if (!error) {
*port_no = port.port;
}
}
static int
-dpif_linux_port_del(struct dpif *dpif_, uint16_t port_no)
+xfif_linux_port_del(struct xfif *xfif_, uint16_t port_no)
{
int tmp = port_no;
- return do_ioctl(xfif_, XFLOW_PORT_DEL, &tmp);
+ int err;
- struct odp_port port;
++ struct xflow_port port;
+
- err = dpif_port_query_by_number(dpif_, port_no, &port);
++ err = xfif_port_query_by_number(xfif_, port_no, &port);
+ if (err) {
+ return err;
+ }
+
- err = do_ioctl(dpif_, ODP_PORT_DETACH, &tmp);
++ err = do_ioctl(xfif_, XFLOW_PORT_DETACH, &tmp);
+ if (err) {
+ return err;
+ }
+
+ if (!netdev_is_open(port.devname)) {
+ /* Try deleting the port if no one has it open. This shouldn't
+ * actually be necessary unless the config changed while we weren't
+ * running but it won't hurt anything if the port is already gone. */
- do_ioctl(dpif_, ODP_VPORT_DEL, port.devname);
++ do_ioctl(xfif_, XFLOW_VPORT_DEL, port.devname);
+ }
+
+ return 0;
}
static int
}
static int
-dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep)
+xfif_linux_port_poll(const struct xfif *xfif_, char **devnamep)
{
- struct dpif_linux *dpif = dpif_linux_cast(dpif_);
+ struct xfif_linux *xfif = xfif_linux_cast(xfif_);
- if (dpif->change_error) {
- dpif->change_error = false;
- shash_clear(&dpif->changed_ports);
+ if (xfif->change_error) {
+ xfif->change_error = false;
- svec_clear(&xfif->changed_ports);
++ shash_clear(&xfif->changed_ports);
return ENOBUFS;
- } else if (xfif->changed_ports.n) {
- *devnamep = xfif->changed_ports.names[--xfif->changed_ports.n];
- } else if (!shash_is_empty(&dpif->changed_ports)) {
- struct shash_node *node = shash_first(&dpif->changed_ports);
++ } else if (!shash_is_empty(&xfif->changed_ports)) {
++ struct shash_node *node = shash_first(&xfif->changed_ports);
+ *devnamep = xstrdup(node->name);
- shash_delete(&dpif->changed_ports, node);
++ shash_delete(&xfif->changed_ports, node);
return 0;
} else {
return EAGAIN;
}
static void
-dpif_linux_port_poll_wait(const struct dpif *dpif_)
+xfif_linux_port_poll_wait(const struct xfif *xfif_)
{
- struct dpif_linux *dpif = dpif_linux_cast(dpif_);
- if (!shash_is_empty(&dpif->changed_ports) || dpif->change_error) {
+ struct xfif_linux *xfif = xfif_linux_cast(xfif_);
- if (xfif->changed_ports.n || xfif->change_error) {
++ if (!shash_is_empty(&xfif->changed_ports) || xfif->change_error) {
poll_immediate_wake();
} else {
rtnetlink_notifier_wait();
int retval;
int error;
- buf = ofpbuf_new(65536);
- buf = ofpbuf_new(65536 + DPIF_RECV_MSG_PADDING);
- ofpbuf_reserve(buf, DPIF_RECV_MSG_PADDING);
- retval = read(dpif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf));
++ buf = ofpbuf_new(65536 + XFIF_RECV_MSG_PADDING);
++ ofpbuf_reserve(buf, XFIF_RECV_MSG_PADDING);
+ retval = read(xfif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf));
if (retval < 0) {
error = errno;
if (error != EAGAIN) {
char *name;
name = xasprintf("dp%d", minor);
- dpif_init(&dpif->dpif, &dpif_linux_class, name, minor, minor);
+ xfif_init(&xfif->xfif, &xfif_linux_class, name, minor, minor);
free(name);
- dpif->fd = fd;
- dpif->local_ifname = NULL;
- dpif->minor = minor;
- dpif->local_ifindex = 0;
- shash_init(&dpif->changed_ports);
- dpif->change_error = false;
- *dpifp = &dpif->dpif;
+ xfif->fd = fd;
+ xfif->local_ifname = NULL;
+ xfif->minor = minor;
+ xfif->local_ifindex = 0;
- svec_init(&xfif->changed_ports);
++ shash_init(&xfif->changed_ports);
+ xfif->change_error = false;
+ *xfifp = &xfif->xfif;
} else {
- free(dpif);
+ free(xfif);
}
} else {
error = errno;
{
/* Our datapath changed, either adding a new port or deleting an
* existing one. */
- if (!svec_contains(&xfif->changed_ports, change->ifname)) {
- svec_add(&xfif->changed_ports, change->ifname);
- svec_sort(&xfif->changed_ports);
- }
- shash_add_once(&dpif->changed_ports, change->ifname, NULL);
++ shash_add_once(&xfif->changed_ports, change->ifname, NULL);
}
} else {
- dpif->change_error = true;
+ xfif->change_error = true;
}
}
--- /dev/null
- flow_extract(©, in_port, &flow);
+/*
+ * Copyright (c) 2009, 2010 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "xfif.h"
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <netinet/in.h>
+#include <net/if.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "csum.h"
+#include "flow.h"
+#include "hmap.h"
+#include "list.h"
+#include "netdev.h"
+#include "xflow-util.h"
+#include "ofp-print.h"
+#include "ofpbuf.h"
+#include "packets.h"
+#include "poll-loop.h"
+#include "queue.h"
+#include "timeval.h"
+#include "util.h"
+#include "xfif-provider.h"
+
+#include "vlog.h"
+#define THIS_MODULE VLM_xfif_netdev
+
+/* Configuration parameters. */
+enum { N_QUEUES = 2 }; /* Number of queues for xfif_recv(). */
+enum { MAX_QUEUE_LEN = 100 }; /* Maximum number of packets per queue. */
+enum { N_GROUPS = 16 }; /* Number of port groups. */
+enum { MAX_PORTS = 256 }; /* Maximum number of ports. */
+enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */
+
+/* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
+ * headers to be aligned on a 4-byte boundary. */
+enum { XF_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
+
+/* Datapath based on the network device interface from netdev.h. */
+struct xf_netdev {
+ struct list node;
+ int xf_idx;
+ int open_cnt;
+ bool destroyed;
+
+ bool drop_frags; /* Drop all IP fragments, if true. */
+ struct ovs_queue queues[N_QUEUES]; /* Messages queued for xfif_recv(). */
+ struct hmap flow_table; /* Flow table. */
+ struct xflow_port_group groups[N_GROUPS];
+
+ /* Statistics. */
+ long long int n_frags; /* Number of dropped IP fragments. */
+ long long int n_hit; /* Number of flow table matches. */
+ long long int n_missed; /* Number of flow table misses. */
+ long long int n_lost; /* Number of misses not passed to client. */
+
+ /* Ports. */
+ int n_ports;
+ struct xf_netdev_port *ports[MAX_PORTS];
+ struct list port_list;
+ unsigned int serial;
+};
+
+/* A port in a netdev-based datapath. */
+struct xf_netdev_port {
+ int port_no; /* Index into xf_netdev's 'ports'. */
+ struct list node; /* Element in xf_netdev's 'port_list'. */
+ struct netdev *netdev;
+ bool internal; /* Internal port (as XFLOW_PORT_INTERNAL)? */
+};
+
+/* A flow in xf_netdev's 'flow_table'. */
+struct xf_netdev_flow {
+ struct hmap_node node; /* Element in xf_netdev's 'flow_table'. */
+ struct xflow_key key;
+
+ /* Statistics. */
+ struct timeval used; /* Last used time, in milliseconds. */
+ long long int packet_count; /* Number of packets matched. */
+ long long int byte_count; /* Number of bytes matched. */
+ uint8_t ip_tos; /* IP TOS value. */
+ uint16_t tcp_ctl; /* Bitwise-OR of seen tcp_ctl values. */
+
+ /* Actions. */
+ union xflow_action *actions;
+ unsigned int n_actions;
+};
+
+/* Interface to netdev-based datapath. */
+struct xfif_netdev {
+ struct xfif xfif;
+ struct xf_netdev *xf;
+ int listen_mask;
+ unsigned int xf_serial;
+};
+
+/* All netdev-based datapaths. */
+static struct xf_netdev *xf_netdevs[256];
+struct list xf_netdev_list = LIST_INITIALIZER(&xf_netdev_list);
+enum { N_XF_NETDEVS = ARRAY_SIZE(xf_netdevs) };
+
+/* Maximum port MTU seen so far. */
+static int max_mtu = ETH_PAYLOAD_MAX;
+
+static int get_port_by_number(struct xf_netdev *, uint16_t port_no,
+ struct xf_netdev_port **portp);
+static int get_port_by_name(struct xf_netdev *, const char *devname,
+ struct xf_netdev_port **portp);
+static void xf_netdev_free(struct xf_netdev *);
+static void xf_netdev_flow_flush(struct xf_netdev *);
+static int do_add_port(struct xf_netdev *, const char *devname, uint16_t flags,
+ uint16_t port_no);
+static int do_del_port(struct xf_netdev *, uint16_t port_no);
+static int xf_netdev_output_control(struct xf_netdev *, const struct ofpbuf *,
+ int queue_no, int port_no, uint32_t arg);
+static int xf_netdev_execute_actions(struct xf_netdev *,
+ struct ofpbuf *, struct xflow_key *,
+ const union xflow_action *, int n);
+
+static struct xfif_netdev *
+xfif_netdev_cast(const struct xfif *xfif)
+{
+ xfif_assert_class(xfif, &xfif_netdev_class);
+ return CONTAINER_OF(xfif, struct xfif_netdev, xfif);
+}
+
+static struct xf_netdev *
+get_xf_netdev(const struct xfif *xfif)
+{
+ return xfif_netdev_cast(xfif)->xf;
+}
+
+static int
+name_to_xf_idx(const char *name)
+{
+ if (!strncmp(name, "xf", 2) && isdigit((unsigned char)name[2])) {
+ int xf_idx = atoi(name + 2);
+ if (xf_idx >= 0 && xf_idx < N_XF_NETDEVS) {
+ return xf_idx;
+ }
+ }
+ return -1;
+}
+
+static struct xf_netdev *
+find_xf_netdev(const char *name)
+{
+ int xf_idx;
+ size_t i;
+
+ xf_idx = name_to_xf_idx(name);
+ if (xf_idx >= 0) {
+ return xf_netdevs[xf_idx];
+ }
+
+ for (i = 0; i < N_XF_NETDEVS; i++) {
+ struct xf_netdev *xf = xf_netdevs[i];
+ if (xf) {
+ struct xf_netdev_port *port;
+ if (!get_port_by_name(xf, name, &port)) {
+ return xf;
+ }
+ }
+ }
+ return NULL;
+}
+
+static struct xfif *
+create_xfif_netdev(struct xf_netdev *xf)
+{
+ struct xfif_netdev *xfif;
+ char *xfname;
+
+ xf->open_cnt++;
+
+ xfname = xasprintf("xf%d", xf->xf_idx);
+ xfif = xmalloc(sizeof *xfif);
+ xfif_init(&xfif->xfif, &xfif_netdev_class, xfname, xf->xf_idx, xf->xf_idx);
+ xfif->xf = xf;
+ xfif->listen_mask = 0;
+ xfif->xf_serial = xf->serial;
+ free(xfname);
+
+ return &xfif->xfif;
+}
+
+static int
+create_xf_netdev(const char *name, int xf_idx, struct xfif **xfifp)
+{
+ struct xf_netdev *xf;
+ int error;
+ int i;
+
+ if (xf_netdevs[xf_idx]) {
+ return EBUSY;
+ }
+
+ /* Create datapath. */
+ xf_netdevs[xf_idx] = xf = xzalloc(sizeof *xf);
+ list_push_back(&xf_netdev_list, &xf->node);
+ xf->xf_idx = xf_idx;
+ xf->open_cnt = 0;
+ xf->drop_frags = false;
+ for (i = 0; i < N_QUEUES; i++) {
+ queue_init(&xf->queues[i]);
+ }
+ hmap_init(&xf->flow_table);
+ for (i = 0; i < N_GROUPS; i++) {
+ xf->groups[i].ports = NULL;
+ xf->groups[i].n_ports = 0;
+ xf->groups[i].group = i;
+ }
+ list_init(&xf->port_list);
+ error = do_add_port(xf, name, XFLOW_PORT_INTERNAL, XFLOWP_LOCAL);
+ if (error) {
+ xf_netdev_free(xf);
+ return ENODEV;
+ }
+
+ *xfifp = create_xfif_netdev(xf);
+ return 0;
+}
+
+static int
+xfif_netdev_open(const char *name, const char *type OVS_UNUSED, bool create,
+ struct xfif **xfifp)
+{
+ if (create) {
+ if (find_xf_netdev(name)) {
+ return EEXIST;
+ } else {
+ int xf_idx = name_to_xf_idx(name);
+ if (xf_idx >= 0) {
+ return create_xf_netdev(name, xf_idx, xfifp);
+ } else {
+ /* Scan for unused xf_idx number. */
+ for (xf_idx = 0; xf_idx < N_XF_NETDEVS; xf_idx++) {
+ int error = create_xf_netdev(name, xf_idx, xfifp);
+ if (error != EBUSY) {
+ return error;
+ }
+ }
+
+ /* All datapath numbers in use. */
+ return ENOBUFS;
+ }
+ }
+ } else {
+ struct xf_netdev *xf = find_xf_netdev(name);
+ if (xf) {
+ *xfifp = create_xfif_netdev(xf);
+ return 0;
+ } else {
+ return ENODEV;
+ }
+ }
+}
+
+static void
+xf_netdev_free(struct xf_netdev *xf)
+{
+ int i;
+
+ xf_netdev_flow_flush(xf);
+ while (xf->n_ports > 0) {
+ struct xf_netdev_port *port = CONTAINER_OF(
+ xf->port_list.next, struct xf_netdev_port, node);
+ do_del_port(xf, port->port_no);
+ }
+ for (i = 0; i < N_QUEUES; i++) {
+ queue_destroy(&xf->queues[i]);
+ }
+ hmap_destroy(&xf->flow_table);
+ for (i = 0; i < N_GROUPS; i++) {
+ free(xf->groups[i].ports);
+ }
+ xf_netdevs[xf->xf_idx] = NULL;
+ list_remove(&xf->node);
+ free(xf);
+}
+
+static void
+xfif_netdev_close(struct xfif *xfif)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ assert(xf->open_cnt > 0);
+ if (--xf->open_cnt == 0 && xf->destroyed) {
+ xf_netdev_free(xf);
+ }
+ free(xfif);
+}
+
+static int
+xfif_netdev_destroy(struct xfif *xfif)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ xf->destroyed = true;
+ return 0;
+}
+
+static int
+xfif_netdev_get_stats(const struct xfif *xfif, struct xflow_stats *stats)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ memset(stats, 0, sizeof *stats);
+ stats->n_flows = hmap_count(&xf->flow_table);
+ stats->cur_capacity = hmap_capacity(&xf->flow_table);
+ stats->max_capacity = MAX_FLOWS;
+ stats->n_ports = xf->n_ports;
+ stats->max_ports = MAX_PORTS;
+ stats->max_groups = N_GROUPS;
+ stats->n_frags = xf->n_frags;
+ stats->n_hit = xf->n_hit;
+ stats->n_missed = xf->n_missed;
+ stats->n_lost = xf->n_lost;
+ stats->max_miss_queue = MAX_QUEUE_LEN;
+ stats->max_action_queue = MAX_QUEUE_LEN;
+ return 0;
+}
+
+static int
+xfif_netdev_get_drop_frags(const struct xfif *xfif, bool *drop_fragsp)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ *drop_fragsp = xf->drop_frags;
+ return 0;
+}
+
+static int
+xfif_netdev_set_drop_frags(struct xfif *xfif, bool drop_frags)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ xf->drop_frags = drop_frags;
+ return 0;
+}
+
+static int
+do_add_port(struct xf_netdev *xf, const char *devname, uint16_t flags,
+ uint16_t port_no)
+{
+ bool internal = (flags & XFLOW_PORT_INTERNAL) != 0;
+ struct xf_netdev_port *port;
+ struct netdev_options netdev_options;
+ struct netdev *netdev;
+ int mtu;
+ int error;
+
+ /* XXX reject devices already in some xf_netdev. */
+
+ /* Open and validate network device. */
+ memset(&netdev_options, 0, sizeof netdev_options);
+ netdev_options.name = devname;
+ netdev_options.ethertype = NETDEV_ETH_TYPE_ANY;
+ netdev_options.may_create = true;
+ if (internal) {
+ netdev_options.type = "tap";
+ } else {
+ netdev_options.may_open = true;
+ }
+
+ error = netdev_open(&netdev_options, &netdev);
+ if (error) {
+ return error;
+ }
+ /* XXX reject loopback devices */
+ /* XXX reject non-Ethernet devices */
+
+ error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, false);
+ if (error) {
+ netdev_close(netdev);
+ return error;
+ }
+
+ port = xmalloc(sizeof *port);
+ port->port_no = port_no;
+ port->netdev = netdev;
+ port->internal = internal;
+
+ netdev_get_mtu(netdev, &mtu);
+ if (mtu > max_mtu) {
+ max_mtu = mtu;
+ }
+
+ list_push_back(&xf->port_list, &port->node);
+ xf->ports[port_no] = port;
+ xf->n_ports++;
+ xf->serial++;
+
+ return 0;
+}
+
+static int
+xfif_netdev_port_add(struct xfif *xfif, const char *devname, uint16_t flags,
+ uint16_t *port_nop)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ int port_no;
+
+ for (port_no = 0; port_no < MAX_PORTS; port_no++) {
+ if (!xf->ports[port_no]) {
+ *port_nop = port_no;
+ return do_add_port(xf, devname, flags, port_no);
+ }
+ }
+ return EFBIG;
+}
+
+static int
+xfif_netdev_port_del(struct xfif *xfif, uint16_t port_no)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ return port_no == XFLOWP_LOCAL ? EINVAL : do_del_port(xf, port_no);
+}
+
+static bool
+is_valid_port_number(uint16_t port_no)
+{
+ return port_no < MAX_PORTS;
+}
+
+static int
+get_port_by_number(struct xf_netdev *xf,
+ uint16_t port_no, struct xf_netdev_port **portp)
+{
+ if (!is_valid_port_number(port_no)) {
+ *portp = NULL;
+ return EINVAL;
+ } else {
+ *portp = xf->ports[port_no];
+ return *portp ? 0 : ENOENT;
+ }
+}
+
+static int
+get_port_by_name(struct xf_netdev *xf,
+ const char *devname, struct xf_netdev_port **portp)
+{
+ struct xf_netdev_port *port;
+
+ LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) {
+ if (!strcmp(netdev_get_name(port->netdev), devname)) {
+ *portp = port;
+ return 0;
+ }
+ }
+ return ENOENT;
+}
+
+static int
+do_del_port(struct xf_netdev *xf, uint16_t port_no)
+{
+ struct xf_netdev_port *port;
+ char *name;
+ int error;
+
+ error = get_port_by_number(xf, port_no, &port);
+ if (error) {
+ return error;
+ }
+
+ list_remove(&port->node);
+ xf->ports[port->port_no] = NULL;
+ xf->n_ports--;
+ xf->serial++;
+
+ name = xstrdup(netdev_get_name(port->netdev));
+ netdev_close(port->netdev);
+
+ free(name);
+ free(port);
+
+ return 0;
+}
+
+static void
+answer_port_query(const struct xf_netdev_port *port, struct xflow_port *xflow_port)
+{
+ memset(xflow_port, 0, sizeof *xflow_port);
+ ovs_strlcpy(xflow_port->devname, netdev_get_name(port->netdev),
+ sizeof xflow_port->devname);
+ xflow_port->port = port->port_no;
+ xflow_port->flags = port->internal ? XFLOW_PORT_INTERNAL : 0;
+}
+
+static int
+xfif_netdev_port_query_by_number(const struct xfif *xfif, uint16_t port_no,
+ struct xflow_port *xflow_port)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ struct xf_netdev_port *port;
+ int error;
+
+ error = get_port_by_number(xf, port_no, &port);
+ if (!error) {
+ answer_port_query(port, xflow_port);
+ }
+ return error;
+}
+
+static int
+xfif_netdev_port_query_by_name(const struct xfif *xfif, const char *devname,
+ struct xflow_port *xflow_port)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ struct xf_netdev_port *port;
+ int error;
+
+ error = get_port_by_name(xf, devname, &port);
+ if (!error) {
+ answer_port_query(port, xflow_port);
+ }
+ return error;
+}
+
+static void
+xf_netdev_free_flow(struct xf_netdev *xf, struct xf_netdev_flow *flow)
+{
+ hmap_remove(&xf->flow_table, &flow->node);
+ free(flow->actions);
+ free(flow);
+}
+
+static void
+xf_netdev_flow_flush(struct xf_netdev *xf)
+{
+ struct xf_netdev_flow *flow, *next;
+
+ HMAP_FOR_EACH_SAFE (flow, next, struct xf_netdev_flow, node,
+ &xf->flow_table) {
+ xf_netdev_free_flow(xf, flow);
+ }
+}
+
+static int
+xfif_netdev_flow_flush(struct xfif *xfif)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ xf_netdev_flow_flush(xf);
+ return 0;
+}
+
+static int
+xfif_netdev_port_list(const struct xfif *xfif, struct xflow_port *ports, int n)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ struct xf_netdev_port *port;
+ int i;
+
+ i = 0;
+ LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) {
+ struct xflow_port *xflow_port = &ports[i];
+ if (i >= n) {
+ break;
+ }
+ answer_port_query(port, xflow_port);
+ i++;
+ }
+ return xf->n_ports;
+}
+
+static int
+xfif_netdev_port_poll(const struct xfif *xfif_, char **devnamep OVS_UNUSED)
+{
+ struct xfif_netdev *xfif = xfif_netdev_cast(xfif_);
+ if (xfif->xf_serial != xfif->xf->serial) {
+ xfif->xf_serial = xfif->xf->serial;
+ return ENOBUFS;
+ } else {
+ return EAGAIN;
+ }
+}
+
+static void
+xfif_netdev_port_poll_wait(const struct xfif *xfif_)
+{
+ struct xfif_netdev *xfif = xfif_netdev_cast(xfif_);
+ if (xfif->xf_serial != xfif->xf->serial) {
+ poll_immediate_wake();
+ }
+}
+
+static int
+get_port_group(const struct xfif *xfif, int group_no,
+ struct xflow_port_group **groupp)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+
+ if (group_no >= 0 && group_no < N_GROUPS) {
+ *groupp = &xf->groups[group_no];
+ return 0;
+ } else {
+ *groupp = NULL;
+ return EINVAL;
+ }
+}
+
+static int
+xfif_netdev_port_group_get(const struct xfif *xfif, int group_no,
+ uint16_t ports[], int n)
+{
+ struct xflow_port_group *group;
+ int error;
+
+ if (n < 0) {
+ return -EINVAL;
+ }
+
+ error = get_port_group(xfif, group_no, &group);
+ if (!error) {
+ memcpy(ports, group->ports, MIN(n, group->n_ports) * sizeof *ports);
+ return group->n_ports;
+ } else {
+ return -error;
+ }
+}
+
+static int
+xfif_netdev_port_group_set(struct xfif *xfif, int group_no,
+ const uint16_t ports[], int n)
+{
+ struct xflow_port_group *group;
+ int error;
+
+ if (n < 0 || n > MAX_PORTS) {
+ return EINVAL;
+ }
+
+ error = get_port_group(xfif, group_no, &group);
+ if (!error) {
+ free(group->ports);
+ group->ports = xmemdup(ports, n * sizeof *group->ports);
+ group->n_ports = n;
+ group->group = group_no;
+ }
+ return error;
+}
+
+static struct xf_netdev_flow *
+xf_netdev_lookup_flow(const struct xf_netdev *xf,
+ const struct xflow_key *key)
+{
+ struct xf_netdev_flow *flow;
+
+ HMAP_FOR_EACH_WITH_HASH (flow, struct xf_netdev_flow, node,
+ xflow_key_hash(key, 0), &xf->flow_table) {
+ if (xflow_key_equal(&flow->key, key)) {
+ return flow;
+ }
+ }
+ return NULL;
+}
+
+static void
+answer_flow_query(struct xf_netdev_flow *flow, uint32_t query_flags,
+ struct xflow_flow *xflow_flow)
+{
+ if (flow) {
+ xflow_flow->key = flow->key;
+ xflow_flow->stats.n_packets = flow->packet_count;
+ xflow_flow->stats.n_bytes = flow->byte_count;
+ xflow_flow->stats.used_sec = flow->used.tv_sec;
+ xflow_flow->stats.used_nsec = flow->used.tv_usec * 1000;
+ xflow_flow->stats.tcp_flags = TCP_FLAGS(flow->tcp_ctl);
+ xflow_flow->stats.ip_tos = flow->ip_tos;
+ xflow_flow->stats.error = 0;
+ if (xflow_flow->n_actions > 0) {
+ unsigned int n = MIN(xflow_flow->n_actions, flow->n_actions);
+ memcpy(xflow_flow->actions, flow->actions,
+ n * sizeof *xflow_flow->actions);
+ xflow_flow->n_actions = flow->n_actions;
+ }
+
+ if (query_flags & XFLOWFF_ZERO_TCP_FLAGS) {
+ flow->tcp_ctl = 0;
+ }
+
+ } else {
+ xflow_flow->stats.error = ENOENT;
+ }
+}
+
+static int
+xfif_netdev_flow_get(const struct xfif *xfif, struct xflow_flow flows[], int n)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ int i;
+
+ for (i = 0; i < n; i++) {
+ struct xflow_flow *xflow_flow = &flows[i];
+ answer_flow_query(xf_netdev_lookup_flow(xf, &xflow_flow->key),
+ xflow_flow->flags, xflow_flow);
+ }
+ return 0;
+}
+
+static int
+xfif_netdev_validate_actions(const union xflow_action *actions, int n_actions,
+ bool *mutates)
+{
+ unsigned int i;
+
+ *mutates = false;
+ for (i = 0; i < n_actions; i++) {
+ const union xflow_action *a = &actions[i];
+ switch (a->type) {
+ case XFLOWAT_OUTPUT:
+ if (a->output.port >= MAX_PORTS) {
+ return EINVAL;
+ }
+ break;
+
+ case XFLOWAT_OUTPUT_GROUP:
+ *mutates = true;
+ if (a->output_group.group >= N_GROUPS) {
+ return EINVAL;
+ }
+ break;
+
+ case XFLOWAT_CONTROLLER:
+ break;
+
+ case XFLOWAT_SET_DL_TCI:
+ *mutates = true;
+ if (a->dl_tci.mask != htons(VLAN_VID_MASK)
+ && a->dl_tci.mask != htons(VLAN_PCP_MASK)
+ && a->dl_tci.mask != htons(VLAN_VID_MASK | VLAN_PCP_MASK)) {
+ return EINVAL;
+ }
+ if (a->dl_tci.tci & ~a->dl_tci.mask){
+ return EINVAL;
+ }
+ break;
+
+ case XFLOWAT_SET_NW_TOS:
+ *mutates = true;
+ if (a->nw_tos.nw_tos & IP_ECN_MASK) {
+ return EINVAL;
+ }
+ break;
+
+ case XFLOWAT_STRIP_VLAN:
+ case XFLOWAT_SET_DL_SRC:
+ case XFLOWAT_SET_DL_DST:
+ case XFLOWAT_SET_NW_SRC:
+ case XFLOWAT_SET_NW_DST:
+ case XFLOWAT_SET_TP_SRC:
+ case XFLOWAT_SET_TP_DST:
+ *mutates = true;
+ break;
+
+ default:
+ return EOPNOTSUPP;
+ }
+ }
+ return 0;
+}
+
+static int
+set_flow_actions(struct xf_netdev_flow *flow, struct xflow_flow *xflow_flow)
+{
+ size_t n_bytes;
+ bool mutates;
+ int error;
+
+ if (xflow_flow->n_actions >= 4096 / sizeof *xflow_flow->actions) {
+ return EINVAL;
+ }
+ error = xfif_netdev_validate_actions(xflow_flow->actions,
+ xflow_flow->n_actions, &mutates);
+ if (error) {
+ return error;
+ }
+
+ n_bytes = xflow_flow->n_actions * sizeof *flow->actions;
+ flow->actions = xrealloc(flow->actions, n_bytes);
+ flow->n_actions = xflow_flow->n_actions;
+ memcpy(flow->actions, xflow_flow->actions, n_bytes);
+ return 0;
+}
+
+static int
+add_flow(struct xfif *xfif, struct xflow_flow *xflow_flow)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ struct xf_netdev_flow *flow;
+ int error;
+
+ flow = xzalloc(sizeof *flow);
+ flow->key = xflow_flow->key;
+
+ error = set_flow_actions(flow, xflow_flow);
+ if (error) {
+ free(flow);
+ return error;
+ }
+
+ hmap_insert(&xf->flow_table, &flow->node,
+ xflow_key_hash(&flow->key, 0));
+ return 0;
+}
+
+static void
+clear_stats(struct xf_netdev_flow *flow)
+{
+ flow->used.tv_sec = 0;
+ flow->used.tv_usec = 0;
+ flow->packet_count = 0;
+ flow->byte_count = 0;
+ flow->ip_tos = 0;
+ flow->tcp_ctl = 0;
+}
+
+static int
+xfif_netdev_flow_put(struct xfif *xfif, struct xflow_flow_put *put)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ struct xf_netdev_flow *flow;
+
+ flow = xf_netdev_lookup_flow(xf, &put->flow.key);
+ if (!flow) {
+ if (put->flags & XFLOWPF_CREATE) {
+ if (hmap_count(&xf->flow_table) < MAX_FLOWS) {
+ return add_flow(xfif, &put->flow);
+ } else {
+ return EFBIG;
+ }
+ } else {
+ return ENOENT;
+ }
+ } else {
+ if (put->flags & XFLOWPF_MODIFY) {
+ int error = set_flow_actions(flow, &put->flow);
+ if (!error && put->flags & XFLOWPF_ZERO_STATS) {
+ clear_stats(flow);
+ }
+ return error;
+ } else {
+ return EEXIST;
+ }
+ }
+}
+
+
+static int
+xfif_netdev_flow_del(struct xfif *xfif, struct xflow_flow *xflow_flow)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ struct xf_netdev_flow *flow;
+
+ flow = xf_netdev_lookup_flow(xf, &xflow_flow->key);
+ if (flow) {
+ answer_flow_query(flow, 0, xflow_flow);
+ xf_netdev_free_flow(xf, flow);
+ return 0;
+ } else {
+ return ENOENT;
+ }
+}
+
+static int
+xfif_netdev_flow_list(const struct xfif *xfif, struct xflow_flow flows[], int n)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ struct xf_netdev_flow *flow;
+ int i;
+
+ i = 0;
+ HMAP_FOR_EACH (flow, struct xf_netdev_flow, node, &xf->flow_table) {
+ if (i >= n) {
+ break;
+ }
+ answer_flow_query(flow, 0, &flows[i++]);
+ }
+ return hmap_count(&xf->flow_table);
+}
+
+static int
+xfif_netdev_execute(struct xfif *xfif, uint16_t in_port,
+ const union xflow_action actions[], int n_actions,
+ const struct ofpbuf *packet)
+{
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ struct ofpbuf copy;
+ bool mutates;
+ struct xflow_key key;
+ flow_t flow;
+ int error;
+
+ if (packet->size < ETH_HEADER_LEN || packet->size > UINT16_MAX) {
+ return EINVAL;
+ }
+
+ error = xfif_netdev_validate_actions(actions, n_actions, &mutates);
+ if (error) {
+ return error;
+ }
+
+ if (mutates) {
+ /* We need a deep copy of 'packet' since we're going to modify its
+ * data. */
+ ofpbuf_init(©, XF_NETDEV_HEADROOM + packet->size);
+ copy.data = (char*)copy.base + XF_NETDEV_HEADROOM;
+ ofpbuf_put(©, packet->data, packet->size);
+ } else {
+ /* We still need a shallow copy of 'packet', even though we won't
+ * modify its data, because flow_extract() modifies packet->l2, etc.
+ * We could probably get away with modifying those but it's more polite
+ * if we don't. */
+ copy = *packet;
+ }
- if (flow_extract(packet, port->port_no, &f) && xf->drop_frags) {
++ flow_extract(©, 0, in_port, &flow);
+ xflow_key_from_flow(&key, &flow);
+ error = xf_netdev_execute_actions(xf, ©, &key, actions, n_actions);
+ if (mutates) {
+ ofpbuf_uninit(©);
+ }
+ return error;
+}
+
+static int
+xfif_netdev_recv_get_mask(const struct xfif *xfif, int *listen_mask)
+{
+ struct xfif_netdev *xfif_netdev = xfif_netdev_cast(xfif);
+ *listen_mask = xfif_netdev->listen_mask;
+ return 0;
+}
+
+static int
+xfif_netdev_recv_set_mask(struct xfif *xfif, int listen_mask)
+{
+ struct xfif_netdev *xfif_netdev = xfif_netdev_cast(xfif);
+ if (!(listen_mask & ~XFLOWL_ALL)) {
+ xfif_netdev->listen_mask = listen_mask;
+ return 0;
+ } else {
+ return EINVAL;
+ }
+}
+
+static struct ovs_queue *
+find_nonempty_queue(struct xfif *xfif)
+{
+ struct xfif_netdev *xfif_netdev = xfif_netdev_cast(xfif);
+ struct xf_netdev *xf = get_xf_netdev(xfif);
+ int mask = xfif_netdev->listen_mask;
+ int i;
+
+ for (i = 0; i < N_QUEUES; i++) {
+ struct ovs_queue *q = &xf->queues[i];
+ if (q->n && mask & (1u << i)) {
+ return q;
+ }
+ }
+ return NULL;
+}
+
+static int
+xfif_netdev_recv(struct xfif *xfif, struct ofpbuf **bufp)
+{
+ struct ovs_queue *q = find_nonempty_queue(xfif);
+ if (q) {
+ *bufp = queue_pop_head(q);
+ return 0;
+ } else {
+ return EAGAIN;
+ }
+}
+
+static void
+xfif_netdev_recv_wait(struct xfif *xfif)
+{
+ struct ovs_queue *q = find_nonempty_queue(xfif);
+ if (q) {
+ poll_immediate_wake();
+ } else {
+ /* No messages ready to be received, and xf_wait() will ensure that we
+ * wake up to queue new messages, so there is nothing to do. */
+ }
+}
+\f
+static void
+xf_netdev_flow_used(struct xf_netdev_flow *flow,
+ const struct xflow_key *key,
+ const struct ofpbuf *packet)
+{
+ time_timeval(&flow->used);
+ flow->packet_count++;
+ flow->byte_count += packet->size;
+ if (key->dl_type == htons(ETH_TYPE_IP)) {
+ struct ip_header *nh = packet->l3;
+ flow->ip_tos = nh->ip_tos;
+
+ if (key->nw_proto == IPPROTO_TCP) {
+ struct tcp_header *th = packet->l4;
+ flow->tcp_ctl |= th->tcp_ctl;
+ }
+ }
+}
+
+static void
+xf_netdev_port_input(struct xf_netdev *xf, struct xf_netdev_port *port,
+ struct ofpbuf *packet)
+{
+ struct xf_netdev_flow *flow;
+ struct xflow_key key;
+ flow_t f;
+
- msg = ofpbuf_new(msg_size);
++ if (flow_extract(packet, 0, port->port_no, &f) && xf->drop_frags) {
+ xf->n_frags++;
+ return;
+ }
+ xflow_key_from_flow(&key, &f);
+
+ flow = xf_netdev_lookup_flow(xf, &key);
+ if (flow) {
+ xf_netdev_flow_used(flow, &key, packet);
+ xf_netdev_execute_actions(xf, packet, &key,
+ flow->actions, flow->n_actions);
+ xf->n_hit++;
+ } else {
+ xf->n_missed++;
+ xf_netdev_output_control(xf, packet, _XFLOWL_MISS_NR, port->port_no, 0);
+ }
+}
+
+static void
+xf_netdev_run(void)
+{
+ struct ofpbuf packet;
+ struct xf_netdev *xf;
+
+ ofpbuf_init(&packet, XF_NETDEV_HEADROOM + max_mtu);
+ LIST_FOR_EACH (xf, struct xf_netdev, node, &xf_netdev_list) {
+ struct xf_netdev_port *port;
+
+ LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) {
+ int error;
+
+ /* Reset packet contents. */
+ packet.data = (char*)packet.base + XF_NETDEV_HEADROOM;
+ packet.size = 0;
+
+ error = netdev_recv(port->netdev, &packet);
+ if (!error) {
+ xf_netdev_port_input(xf, port, &packet);
+ } else if (error != EAGAIN) {
+ struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+ VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
+ netdev_get_name(port->netdev), strerror(error));
+ }
+ }
+ }
+ ofpbuf_uninit(&packet);
+}
+
+static void
+xf_netdev_wait(void)
+{
+ struct xf_netdev *xf;
+
+ LIST_FOR_EACH (xf, struct xf_netdev, node, &xf_netdev_list) {
+ struct xf_netdev_port *port;
+ LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) {
+ netdev_recv_wait(port->netdev);
+ }
+ }
+}
+
+
+/* Modify or add a 802.1Q header in 'packet' according to 'a'. */
+static void
+xf_netdev_set_dl_tci(struct ofpbuf *packet, struct xflow_key *key,
+ const struct xflow_action_dl_tci *a)
+{
+ struct vlan_eth_header *veh;
+
+ if (key->dl_tci) {
+ veh = packet->l2;
+ veh->veth_tci = (veh->veth_tci & ~a->mask) | a->tci;
+ } else {
+ /* Insert new 802.1Q header. */
+ struct eth_header *eh = packet->l2;
+ struct vlan_eth_header tmp;
+ memcpy(tmp.veth_dst, eh->eth_dst, ETH_ADDR_LEN);
+ memcpy(tmp.veth_src, eh->eth_src, ETH_ADDR_LEN);
+ tmp.veth_type = htons(ETH_TYPE_VLAN);
+ tmp.veth_tci = htons(a->tci);
+ tmp.veth_next_type = eh->eth_type;
+
+ veh = ofpbuf_push_uninit(packet, VLAN_HEADER_LEN);
+ memcpy(veh, &tmp, sizeof tmp);
+ packet->l2 = (char*)packet->l2 - VLAN_HEADER_LEN;
+ }
+
+ key->dl_tci = veh->veth_tci | htons(XFLOW_TCI_PRESENT);
+}
+
+static void
+xf_netdev_strip_vlan(struct ofpbuf *packet, struct xflow_key *key)
+{
+ struct vlan_eth_header *veh = packet->l2;
+ if (veh->veth_type == htons(ETH_TYPE_VLAN)) {
+ struct eth_header tmp;
+
+ memcpy(tmp.eth_dst, veh->veth_dst, ETH_ADDR_LEN);
+ memcpy(tmp.eth_src, veh->veth_src, ETH_ADDR_LEN);
+ tmp.eth_type = veh->veth_next_type;
+
+ packet->size -= VLAN_HEADER_LEN;
+ packet->data = (char*)packet->data + VLAN_HEADER_LEN;
+ packet->l2 = (char*)packet->l2 + VLAN_HEADER_LEN;
+ memcpy(packet->data, &tmp, sizeof tmp);
+
+ key->dl_tci = htons(0);
+ }
+}
+
+static void
+xf_netdev_set_dl_src(struct ofpbuf *packet, struct xflow_key *key,
+ const uint8_t dl_addr[ETH_ADDR_LEN])
+{
+ struct eth_header *eh = packet->l2;
+ memcpy(eh->eth_src, dl_addr, sizeof eh->eth_src);
+ memcpy(key->dl_src, dl_addr, sizeof key->dl_src);
+}
+
+static void
+xf_netdev_set_dl_dst(struct ofpbuf *packet, struct xflow_key *key,
+ const uint8_t dl_addr[ETH_ADDR_LEN])
+{
+ struct eth_header *eh = packet->l2;
+ memcpy(eh->eth_dst, dl_addr, sizeof eh->eth_dst);
+ memcpy(key->dl_dst, dl_addr, sizeof key->dl_dst);
+}
+
+static void
+xf_netdev_set_nw_addr(struct ofpbuf *packet, struct xflow_key *key,
+ const struct xflow_action_nw_addr *a)
+{
+ if (key->dl_type == htons(ETH_TYPE_IP)) {
+ struct ip_header *nh = packet->l3;
+ uint32_t *field;
+
+ field = a->type == XFLOWAT_SET_NW_SRC ? &nh->ip_src : &nh->ip_dst;
+ if (key->nw_proto == IP_TYPE_TCP) {
+ struct tcp_header *th = packet->l4;
+ th->tcp_csum = recalc_csum32(th->tcp_csum, *field, a->nw_addr);
+ } else if (key->nw_proto == IP_TYPE_UDP) {
+ struct udp_header *uh = packet->l4;
+ if (uh->udp_csum) {
+ uh->udp_csum = recalc_csum32(uh->udp_csum, *field, a->nw_addr);
+ if (!uh->udp_csum) {
+ uh->udp_csum = 0xffff;
+ }
+ }
+ }
+ nh->ip_csum = recalc_csum32(nh->ip_csum, *field, a->nw_addr);
+ *field = a->nw_addr;
+
+ if (a->type == XFLOWAT_SET_NW_SRC) {
+ key->nw_src = a->type;
+ } else {
+ key->nw_dst = a->type;
+ }
+ }
+}
+
+static void
+xf_netdev_set_nw_tos(struct ofpbuf *packet, struct xflow_key *key,
+ const struct xflow_action_nw_tos *a)
+{
+ if (key->dl_type == htons(ETH_TYPE_IP)) {
+ struct ip_header *nh = packet->l3;
+ uint8_t *field = &nh->ip_tos;
+
+ /* Set the DSCP bits and preserve the ECN bits. */
+ uint8_t new = a->nw_tos | (nh->ip_tos & IP_ECN_MASK);
+
+ nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t)*field),
+ htons((uint16_t)a->nw_tos));
+ *field = new;
+ key->nw_tos = a->nw_tos;
+ }
+}
+
+static void
+xf_netdev_set_tp_port(struct ofpbuf *packet, struct xflow_key *key,
+ const struct xflow_action_tp_port *a)
+{
+ if (key->dl_type == htons(ETH_TYPE_IP)) {
+ uint16_t *field;
+ if (key->nw_proto == IPPROTO_TCP) {
+ struct tcp_header *th = packet->l4;
+ field = a->type == XFLOWAT_SET_TP_SRC ? &th->tcp_src : &th->tcp_dst;
+ th->tcp_csum = recalc_csum16(th->tcp_csum, *field, a->tp_port);
+ *field = a->tp_port;
+ } else if (key->nw_proto == IPPROTO_UDP) {
+ struct udp_header *uh = packet->l4;
+ field = a->type == XFLOWAT_SET_TP_SRC ? &uh->udp_src : &uh->udp_dst;
+ uh->udp_csum = recalc_csum16(uh->udp_csum, *field, a->tp_port);
+ *field = a->tp_port;
+ } else {
+ return;
+ }
+
+ if (a->type == XFLOWAT_SET_TP_SRC) {
+ key->tp_src = a->tp_port;
+ } else {
+ key->tp_dst = a->tp_port;
+ }
+ }
+}
+
+static void
+xf_netdev_output_port(struct xf_netdev *xf, struct ofpbuf *packet,
+ uint16_t out_port)
+{
+ struct xf_netdev_port *p = xf->ports[out_port];
+ if (p) {
+ netdev_send(p->netdev, packet);
+ }
+}
+
+static void
+xf_netdev_output_group(struct xf_netdev *xf, uint16_t group, uint16_t in_port,
+ struct ofpbuf *packet)
+{
+ struct xflow_port_group *g = &xf->groups[group];
+ int i;
+
+ for (i = 0; i < g->n_ports; i++) {
+ uint16_t out_port = g->ports[i];
+ if (out_port != in_port) {
+ xf_netdev_output_port(xf, packet, out_port);
+ }
+ }
+}
+
+static int
+xf_netdev_output_control(struct xf_netdev *xf, const struct ofpbuf *packet,
+ int queue_no, int port_no, uint32_t arg)
+{
+ struct ovs_queue *q = &xf->queues[queue_no];
+ struct xflow_msg *header;
+ struct ofpbuf *msg;
+ size_t msg_size;
+
+ if (q->n >= MAX_QUEUE_LEN) {
+ xf->n_lost++;
+ return ENOBUFS;
+ }
+
+ msg_size = sizeof *header + packet->size;
++ msg = ofpbuf_new(msg_size + XFIF_RECV_MSG_PADDING);
+ header = ofpbuf_put_uninit(msg, sizeof *header);
++ ofpbuf_reserve(msg, XFIF_RECV_MSG_PADDING);
+ header->type = queue_no;
+ header->length = msg_size;
+ header->port = port_no;
+ header->arg = arg;
+ ofpbuf_put(msg, packet->data, packet->size);
+ queue_push_tail(q, msg);
+
+ return 0;
+}
+
+static int
+xf_netdev_execute_actions(struct xf_netdev *xf,
+ struct ofpbuf *packet, struct xflow_key *key,
+ const union xflow_action *actions, int n_actions)
+{
+ int i;
+ for (i = 0; i < n_actions; i++) {
+ const union xflow_action *a = &actions[i];
+
+ switch (a->type) {
+ case XFLOWAT_OUTPUT:
+ xf_netdev_output_port(xf, packet, a->output.port);
+ break;
+
+ case XFLOWAT_OUTPUT_GROUP:
+ xf_netdev_output_group(xf, a->output_group.group, key->in_port,
+ packet);
+ break;
+
+ case XFLOWAT_CONTROLLER:
+ xf_netdev_output_control(xf, packet, _XFLOWL_ACTION_NR,
+ key->in_port, a->controller.arg);
+ break;
+
+ case XFLOWAT_SET_DL_TCI:
+ xf_netdev_set_dl_tci(packet, key, &a->dl_tci);
+ break;
+
+ case XFLOWAT_STRIP_VLAN:
+ xf_netdev_strip_vlan(packet, key);
+ break;
+
+ case XFLOWAT_SET_DL_SRC:
+ xf_netdev_set_dl_src(packet, key, a->dl_addr.dl_addr);
+ break;
+
+ case XFLOWAT_SET_DL_DST:
+ xf_netdev_set_dl_dst(packet, key, a->dl_addr.dl_addr);
+ break;
+
+ case XFLOWAT_SET_NW_SRC:
+ case XFLOWAT_SET_NW_DST:
+ xf_netdev_set_nw_addr(packet, key, &a->nw_addr);
+ break;
+
+ case XFLOWAT_SET_NW_TOS:
+ xf_netdev_set_nw_tos(packet, key, &a->nw_tos);
+ break;
+
+ case XFLOWAT_SET_TP_SRC:
+ case XFLOWAT_SET_TP_DST:
+ xf_netdev_set_tp_port(packet, key, &a->tp_port);
+ break;
+ }
+ }
+ return 0;
+}
+
+const struct xfif_class xfif_netdev_class = {
+ "netdev",
+ xf_netdev_run,
+ xf_netdev_wait,
+ NULL, /* enumerate */
+ xfif_netdev_open,
+ xfif_netdev_close,
+ NULL, /* get_all_names */
+ xfif_netdev_destroy,
+ xfif_netdev_get_stats,
+ xfif_netdev_get_drop_frags,
+ xfif_netdev_set_drop_frags,
+ xfif_netdev_port_add,
+ xfif_netdev_port_del,
+ xfif_netdev_port_query_by_number,
+ xfif_netdev_port_query_by_name,
+ xfif_netdev_port_list,
+ xfif_netdev_port_poll,
+ xfif_netdev_port_poll_wait,
+ xfif_netdev_port_group_get,
+ xfif_netdev_port_group_set,
+ xfif_netdev_flow_get,
+ xfif_netdev_flow_put,
+ xfif_netdev_flow_del,
+ xfif_netdev_flow_flush,
+ xfif_netdev_flow_list,
+ xfif_netdev_execute,
+ xfif_netdev_recv_get_mask,
+ xfif_netdev_recv_set_mask,
+ NULL, /* get_sflow_probability */
+ NULL, /* set_sflow_probability */
+ xfif_netdev_recv,
+ xfif_netdev_recv_wait,
+};
* datapath. */
#include <assert.h>
-#include "dpif.h"
+ #include "openflow/openflow.h"
+#include "xfif.h"
+ #include "util.h"
#ifdef __cplusplus
extern "C" {
* 'probability' is expressed as the number of packets out of UINT_MAX to
* sample, e.g. probability/UINT_MAX is the probability of sampling a given
* packet. */
- int (*set_sflow_probability)(struct dpif *dpif, uint32_t probability);
+ int (*set_sflow_probability)(struct xfif *xfif, uint32_t probability);
- /* Attempts to receive a message from 'dpif'. If successful, stores the
+ /* Attempts to receive a message from 'xfif'. If successful, stores the
* message into '*packetp'. The message, if one is received, must begin
- * with 'struct xflow_msg' as a header. Only messages of the types
- * selected with the recv_set_mask member function should be received.
- * with 'struct odp_msg' as a header, and must have at least
- * DPIF_RECV_MSG_PADDING bytes of headroom (allocated using
++ * with 'struct xflow_msg' as a header, and must have at least
++ * XFIF_RECV_MSG_PADDING bytes of headroom (allocated using
+ * e.g. ofpbuf_reserve()). Only messages of the types selected with the
+ * set_listen_mask member function should be received.
*
* This function must not block. If no message is ready to be received
* when it is called, it should return EAGAIN without blocking. */
- int (*recv)(struct dpif *dpif, struct ofpbuf **packetp);
+ int (*recv)(struct xfif *xfif, struct ofpbuf **packetp);
- /* Arranges for the poll loop to wake up when 'dpif' has a message queued
+ /* Arranges for the poll loop to wake up when 'xfif' has a message queued
* to be received with the recv member function. */
- void (*recv_wait)(struct dpif *dpif);
+ void (*recv_wait)(struct xfif *xfif);
};
- * member function (see above). This headroom allows "struct odp_msg" to be
+ /* Minimum number of bytes of headroom for a packet returned by the 'recv'
-#define DPIF_RECV_MSG_PADDING (sizeof(struct ofp_packet_in) \
- - sizeof(struct odp_msg))
-BUILD_ASSERT_DECL(sizeof(struct ofp_packet_in) > sizeof(struct odp_msg));
-BUILD_ASSERT_DECL(DPIF_RECV_MSG_PADDING % 4 == 0);
++ * member function (see above). This headroom allows "struct xflow_msg" to be
+ * replaced by "struct ofp_packet_in" without copying the buffer. */
-extern const struct dpif_class dpif_linux_class;
-extern const struct dpif_class dpif_netdev_class;
++#define XFIF_RECV_MSG_PADDING (sizeof(struct ofp_packet_in) \
++ - sizeof(struct xflow_msg))
++BUILD_ASSERT_DECL(sizeof(struct ofp_packet_in) > sizeof(struct xflow_msg));
++BUILD_ASSERT_DECL(XFIF_RECV_MSG_PADDING % 4 == 0);
+
+extern const struct xfif_class xfif_linux_class;
+extern const struct xfif_class xfif_netdev_class;
#ifdef __cplusplus
}
* we really need to see them. */
static struct vlog_rate_limit dpmsg_rl = VLOG_RATE_LIMIT_INIT(600, 600);
-/* Not really much point in logging many dpif errors. */
+/* Not really much point in logging many xfif errors. */
- static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
+ static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
-static void log_operation(const struct dpif *, const char *operation,
+static void log_operation(const struct xfif *, const char *operation,
int error);
-static void log_flow_operation(const struct dpif *, const char *operation,
- int error, struct odp_flow *flow);
-static void log_flow_put(struct dpif *, int error,
- const struct odp_flow_put *);
+static void log_flow_operation(const struct xfif *, const char *operation,
+ int error, struct xflow_flow *flow);
+static void log_flow_put(struct xfif *, int error,
+ const struct xflow_flow_put *);
static bool should_log_flow_message(int error);
-static void check_rw_odp_flow(struct odp_flow *);
+static void check_rw_xflow_flow(struct xflow_flow *);
static void
-dp_initialize(void)
+xf_initialize(void)
{
static int status = -1;
if (!error) {
error = flow->stats.error;
}
+ if (error) {
+ /* Make the results predictable on error. */
+ memset(&flow->stats, 0, sizeof flow->stats);
+ flow->n_actions = 0;
+ }
if (should_log_flow_message(error)) {
- log_flow_operation(dpif, "flow_get", error, flow);
+ log_flow_operation(xfif, "flow_get", error, flow);
}
return error;
}
return error;
}
-/* Attempts to receive a message from 'dpif'. If successful, stores the
+/* Attempts to receive a message from 'xfif'. If successful, stores the
* message into '*packetp'. The message, if one is received, will begin with
- * 'struct xflow_msg' as a header. Only messages of the types selected with
- * 'struct odp_msg' as a header, and will have at least DPIF_RECV_MSG_PADDING
++ * 'struct xflow_msg' as a header, and will have at least XFIF_RECV_MSG_PADDING
+ * bytes of headroom. Only messages of the types selected with
- * dpif_set_listen_mask() will ordinarily be received (but if a message type is
+ * xfif_set_listen_mask() will ordinarily be received (but if a message type is
* enabled and then later disabled, some stragglers might pop up).
*
* Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN
* if no message is immediately available. */
int
-dpif_recv(struct dpif *dpif, struct ofpbuf **packetp)
+xfif_recv(struct xfif *xfif, struct ofpbuf **packetp)
{
- int error = dpif->dpif_class->recv(dpif, packetp);
+ int error = xfif->xfif_class->recv(xfif, packetp);
if (!error) {
- assert(ofpbuf_headroom(buf) >= DPIF_RECV_MSG_PADDING);
+ struct ofpbuf *buf = *packetp;
+
++ assert(ofpbuf_headroom(buf) >= XFIF_RECV_MSG_PADDING);
if (VLOG_IS_DBG_ENABLED()) {
- struct ofpbuf *buf = *packetp;
- struct odp_msg *msg = buf->data;
+ struct xflow_msg *msg = buf->data;
void *payload = msg + 1;
size_t payload_len = buf->size - sizeof *msg;
char *s = ofp_packet_to_string(payload, payload_len, payload_len);
--- /dev/null
- ds_put_format(ds, "in_port%04x", key->in_port);
+/*
+ * Copyright (c) 2009, 2010 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "xflow-util.h"
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include "coverage.h"
+#include "dynamic-string.h"
+#include "flow.h"
+#include "packets.h"
+#include "timeval.h"
+#include "util.h"
+
+union xflow_action *
+xflow_actions_add(struct xflow_actions *actions, uint16_t type)
+{
+ union xflow_action *a;
+ if (actions->n_actions < MAX_XFLOW_ACTIONS) {
+ a = &actions->actions[actions->n_actions++];
+ } else {
+ COVERAGE_INC(xflow_overflow);
+ actions->n_actions = MAX_XFLOW_ACTIONS + 1;
+ a = &actions->actions[MAX_XFLOW_ACTIONS - 1];
+ }
+ memset(a, 0, sizeof *a);
+ a->type = type;
+ return a;
+}
+
+void
+format_xflow_key(struct ds *ds, const struct xflow_key *key)
+{
- ds_put_format(ds, "***bad action %"PRIu16"***", a->type);
++ ds_put_format(ds, "tunnel%"PRIx32":in_port%04x",
++ key->tun_id, key->in_port);
+ if (key->dl_tci) {
+ ds_put_format(ds, ":vlan%"PRIu16":pcp%d",
+ vlan_tci_to_vid(key->dl_tci),
+ vlan_tci_to_pcp(key->dl_tci));
+ }
+ ds_put_format(ds, " mac"ETH_ADDR_FMT"->"ETH_ADDR_FMT" type%04x "
+ "proto%"PRId8" tos%"PRIu8" ip"IP_FMT"->"IP_FMT" port%d->%d",
+ ETH_ADDR_ARGS(key->dl_src), ETH_ADDR_ARGS(key->dl_dst),
+ ntohs(key->dl_type), key->nw_proto, key->nw_tos,
+ IP_ARGS(&key->nw_src), IP_ARGS(&key->nw_dst),
+ ntohs(key->tp_src), ntohs(key->tp_dst));
+}
+
+void
+format_xflow_action(struct ds *ds, const union xflow_action *a)
+{
+ switch (a->type) {
+ case XFLOWAT_OUTPUT:
+ ds_put_format(ds, "%"PRIu16, a->output.port);
+ break;
+ case XFLOWAT_OUTPUT_GROUP:
+ ds_put_format(ds, "g%"PRIu16, a->output_group.group);
+ break;
+ case XFLOWAT_CONTROLLER:
+ ds_put_format(ds, "ctl(%"PRIu32")", a->controller.arg);
+ break;
++ case XFLOWAT_SET_TUNNEL:
++ ds_put_format(ds, "set_tunnel(0x%08"PRIx32")", ntohl(a->tunnel.tun_id));
++ break;
+ case XFLOWAT_SET_DL_TCI:
+ ds_put_format(ds, "set_tci(%04"PRIx16",mask=%04"PRIx16")",
+ ntohs(a->dl_tci.tci), ntohs(a->dl_tci.mask));
+ break;
+ case XFLOWAT_STRIP_VLAN:
+ ds_put_format(ds, "strip_vlan");
+ break;
+ case XFLOWAT_SET_DL_SRC:
+ ds_put_format(ds, "set_dl_src("ETH_ADDR_FMT")",
+ ETH_ADDR_ARGS(a->dl_addr.dl_addr));
+ break;
+ case XFLOWAT_SET_DL_DST:
+ ds_put_format(ds, "set_dl_dst("ETH_ADDR_FMT")",
+ ETH_ADDR_ARGS(a->dl_addr.dl_addr));
+ break;
+ case XFLOWAT_SET_NW_SRC:
+ ds_put_format(ds, "set_nw_src("IP_FMT")",
+ IP_ARGS(&a->nw_addr.nw_addr));
+ break;
+ case XFLOWAT_SET_NW_DST:
+ ds_put_format(ds, "set_nw_dst("IP_FMT")",
+ IP_ARGS(&a->nw_addr.nw_addr));
+ break;
+ case XFLOWAT_SET_NW_TOS:
+ ds_put_format(ds, "set_nw_tos(%"PRIu8")", a->nw_tos.nw_tos);
+ break;
+ case XFLOWAT_SET_TP_SRC:
+ ds_put_format(ds, "set_tp_src(%"PRIu16")", ntohs(a->tp_port.tp_port));
+ break;
+ case XFLOWAT_SET_TP_DST:
+ ds_put_format(ds, "set_tp_dst(%"PRIu16")", ntohs(a->tp_port.tp_port));
+ break;
+ default:
- key->nw_dst = ofp_port_to_xflow_port(flow->nw_dst);
- key->in_port = flow->in_port;
++ ds_put_format(ds, "***bad action 0x%"PRIx16"***", a->type);
+ break;
+ }
+}
+
+void
+format_xflow_actions(struct ds *ds, const union xflow_action *actions,
+ size_t n_actions)
+{
+ size_t i;
+ for (i = 0; i < n_actions; i++) {
+ if (i) {
+ ds_put_char(ds, ',');
+ }
+ format_xflow_action(ds, &actions[i]);
+ }
+ if (!n_actions) {
+ ds_put_cstr(ds, "drop");
+ }
+}
+
+void
+format_xflow_flow_stats(struct ds *ds, const struct xflow_flow_stats *s)
+{
+ ds_put_format(ds, "packets:%llu, bytes:%llu, used:",
+ (unsigned long long int) s->n_packets,
+ (unsigned long long int) s->n_bytes);
+ if (s->used_sec) {
+ long long int used = s->used_sec * 1000 + s->used_nsec / 1000000;
+ ds_put_format(ds, "%.3fs", (time_msec() - used) / 1000.0);
+ } else {
+ ds_put_format(ds, "never");
+ }
+}
+
+void
+format_xflow_flow(struct ds *ds, const struct xflow_flow *f)
+{
+ format_xflow_key(ds, &f->key);
+ ds_put_cstr(ds, ", ");
+ format_xflow_flow_stats(ds, &f->stats);
+ ds_put_cstr(ds, ", actions:");
+ format_xflow_actions(ds, f->actions, f->n_actions);
+}
+\f
+void
+xflow_key_from_flow(struct xflow_key *key, const struct flow *flow)
+{
++ key->tun_id = flow->tun_id;
+ key->nw_src = flow->nw_src;
++ key->nw_dst = flow->nw_dst;
++ key->in_port = ofp_port_to_xflow_port(flow->in_port);
+ if (flow->dl_vlan == htons(OFP_VLAN_NONE)) {
+ key->dl_tci = htons(0);
+ } else {
+ uint16_t vid = flow->dl_vlan & htons(VLAN_VID_MASK);
+ uint16_t pcp = htons((flow->dl_vlan_pcp << VLAN_PCP_SHIFT)
+ & VLAN_PCP_MASK);
+ key->dl_tci = vid | pcp | htons(XFLOW_TCI_PRESENT);
+ }
+ key->dl_type = flow->dl_type;
+ key->tp_src = flow->tp_src;
+ key->tp_dst = flow->tp_dst;
+ memcpy(key->dl_src, flow->dl_src, ETH_ALEN);
+ memcpy(key->dl_dst, flow->dl_dst, ETH_ALEN);
+ key->nw_proto = flow->nw_proto;
+ key->nw_tos = flow->nw_tos;
+}
+
+void
+xflow_key_to_flow(const struct xflow_key *key, struct flow *flow)
+{
+ flow->wildcards = 0;
+ flow->priority = 0xffff;
++ flow->tun_id = key->tun_id;
+ flow->nw_src = key->nw_src;
+ flow->nw_dst = key->nw_dst;
+ flow->in_port = xflow_port_to_ofp_port(key->in_port);
+ if (key->dl_tci) {
+ flow->dl_vlan = htons(vlan_tci_to_vid(key->dl_tci));
+ flow->dl_vlan_pcp = vlan_tci_to_pcp(key->dl_tci);
+ } else {
+ flow->dl_vlan = htons(OFP_VLAN_NONE);
+ flow->dl_vlan_pcp = 0;
+ }
+ flow->dl_type = key->dl_type;
+ flow->tp_src = key->tp_src;
+ flow->tp_dst = key->tp_dst;
+ memcpy(flow->dl_src, key->dl_src, ETH_ALEN);
+ memcpy(flow->dl_dst, key->dl_dst, ETH_ALEN);
+ flow->nw_proto = key->nw_proto;
+ flow->nw_tos = key->nw_tos;
+}
struct discovery;
struct settings;
struct switch_status;
+struct wdp;
int discovery_create(const char *accept_controller_re, bool update_resolv_conf,
- struct dpif *, struct switch_status *,
+ struct wdp *, struct switch_status *,
struct discovery **);
void discovery_destroy(struct discovery *);
+ bool discovery_get_update_resolv_conf(const struct discovery *);
void discovery_set_update_resolv_conf(struct discovery *,
bool update_resolv_conf);
+ const char *discovery_get_accept_controller_re(const struct discovery *);
int discovery_set_accept_controller_re(struct discovery *, const char *re);
void discovery_question_connectivity(struct discovery *);
bool discovery_run(struct discovery *, char **controller_name);
fo->next_bogus_packet_in = LLONG_MAX;
memset(&flow, 0, sizeof flow);
- flow.wildcards = OFPFW_ALL;
- ofproto_delete_flow(fo->ofproto, &flow, OVSFW_ALL, FAIL_OPEN_PRIORITY);
++ flow.wildcards = OVSFW_ALL;
+ flow.priority = FAIL_OPEN_PRIORITY;
+ ofproto_delete_flow(fo->ofproto, &flow);
}
}
action.output.len = htons(sizeof action);
action.output.port = htons(OFPP_NORMAL);
memset(&flow, 0, sizeof flow);
- flow.wildcards = OFPFW_ALL;
- ofproto_add_flow(fo->ofproto, &flow, OVSFW_ALL, FAIL_OPEN_PRIORITY,
- &action, 1, 0);
++ flow.wildcards = OVSFW_ALL;
+ flow.priority = FAIL_OPEN_PRIORITY;
+ ofproto_add_flow(fo->ofproto, &flow, &action, 1, 0);
}
}
#include <string.h>
#include <stdlib.h>
#include "dhcp.h"
-#include "dpif.h"
#include "flow.h"
- #include "mac-learning.h"
#include "netdev.h"
- #include "xflow-util.h"
- #include "ofp-print.h"
-#include "odp-util.h"
#include "ofproto.h"
#include "ofpbuf.h"
#include "openflow/openflow.h"
- #include "openvswitch/xflow.h"
#include "packets.h"
#include "poll-loop.h"
- #include "rconn.h"
#include "status.h"
#include "timeval.h"
- #include "vconn.h"
+#include "wdp.h"
- #include "xfif.h"
#define THIS_MODULE VLM_in_band
#include "vlog.h"
* gateway.
*/
- #define IB_BASE_PRIORITY 18181800
-
+ /* Priorities used in classifier for in-band rules. These values are higher
+ * than any that may be set with OpenFlow, and "18" kind of looks like "IB".
+ * The ordering of priorities is not important because all of the rules set up
+ * by in-band control have the same action. The only reason to use more than
+ * one priority is to make the kind of flow easier to see during debugging. */
enum {
- IBR_FROM_LOCAL_DHCP, /* (a) From local port, DHCP. */
+ /* One set per bridge. */
+ IBR_FROM_LOCAL_DHCP = 180000, /* (a) From local port, DHCP. */
IBR_TO_LOCAL_ARP, /* (b) To local port, ARP. */
IBR_FROM_LOCAL_ARP, /* (c) From local port, ARP. */
- IBR_TO_REMOTE_ARP, /* (d) To remote MAC, ARP. */
- IBR_FROM_REMOTE_ARP, /* (e) From remote MAC, ARP. */
- IBR_TO_CTL_ARP, /* (f) To controller IP, ARP. */
- IBR_FROM_CTL_ARP, /* (g) From controller IP, ARP. */
- IBR_TO_CTL_OFP, /* (h) To controller, OpenFlow port. */
- IBR_FROM_CTL_OFP, /* (i) From controller, OpenFlow port. */
- #if OFP_TCP_PORT != OFP_SSL_PORT
- #error Need to support separate TCP and SSL flows.
- #endif
- N_IB_RULES
+
+ /* One set per unique next-hop MAC. */
+ IBR_TO_NEXT_HOP_ARP, /* (d) To remote MAC, ARP. */
+ IBR_FROM_NEXT_HOP_ARP, /* (e) From remote MAC, ARP. */
+
+ /* One set per unique remote IP address. */
+ IBR_TO_REMOTE_ARP, /* (f) To remote IP, ARP. */
+ IBR_FROM_REMOTE_ARP, /* (g) From remote IP, ARP. */
+
+ /* One set per unique remote (IP,port) pair. */
+ IBR_TO_REMOTE_TCP, /* (h) To remote IP, TCP port. */
+ IBR_FROM_REMOTE_TCP /* (i) From remote IP, TCP port. */
};
- struct ib_rule {
- bool installed;
+ struct in_band_rule {
flow_t flow;
- uint32_t wildcards;
- unsigned int priority;
};
+ /* Track one remote IP and next hop information. */
+ struct in_band_remote {
+ struct sockaddr_in remote_addr; /* IP address, in network byte order. */
+ uint8_t remote_mac[ETH_ADDR_LEN]; /* Next-hop MAC, all-zeros if unknown. */
+ uint8_t last_remote_mac[ETH_ADDR_LEN]; /* Previous nonzero next-hop MAC. */
+ struct netdev *remote_netdev; /* Device to send to next-hop MAC. */
+ };
+
struct in_band {
struct ofproto *ofproto;
- struct rconn *controller;
struct status_category *ss_cat;
- /* Keep track of local port's information. */
- uint8_t local_mac[ETH_ADDR_LEN]; /* Current MAC. */
- struct netdev *local_netdev; /* Local port's network device. */
- time_t next_local_refresh;
-
- /* Keep track of controller and next hop's information. */
- uint32_t controller_ip; /* Controller IP, 0 if unknown. */
- uint8_t remote_mac[ETH_ADDR_LEN]; /* Remote MAC. */
- struct netdev *remote_netdev;
- uint8_t last_remote_mac[ETH_ADDR_LEN]; /* Previous remote MAC. */
- time_t next_remote_refresh;
-
- /* Rules that we set up. */
- struct ib_rule rules[N_IB_RULES];
+ /* Remote information. */
+ time_t next_remote_refresh; /* Refresh timer. */
+ struct in_band_remote *remotes;
+ size_t n_remotes;
+
+ /* Local information. */
+ time_t next_local_refresh; /* Refresh timer. */
+ uint8_t local_mac[ETH_ADDR_LEN]; /* Current MAC. */
+ struct netdev *local_netdev; /* Local port's network device. */
+
+ /* Local and remote addresses that are installed as flows. */
+ uint8_t installed_local_mac[ETH_ADDR_LEN];
+ struct sockaddr_in *remote_addrs;
+ size_t n_remote_addrs;
+ uint8_t *remote_macs;
+ size_t n_remote_macs;
};
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60);
}
}
-/* Returns true if 'packet' should be sent to the local port regardless
- * of the flow table. */
-bool
-in_band_msg_in_hook(struct in_band *in_band, const flow_t *flow,
- const struct ofpbuf *packet)
-{
- if (!in_band) {
- return false;
- }
-
- /* Regardless of how the flow table is configured, we want to be
- * able to see replies to our DHCP requests. */
- if (flow->dl_type == htons(ETH_TYPE_IP)
- && flow->nw_proto == IP_TYPE_UDP
- && flow->tp_src == htons(DHCP_SERVER_PORT)
- && flow->tp_dst == htons(DHCP_CLIENT_PORT)
- && packet->l7) {
- struct dhcp_header *dhcp;
-
- dhcp = ofpbuf_at(packet, (char *)packet->l7 - (char *)packet->data,
- sizeof *dhcp);
- if (!dhcp) {
- return false;
- }
-
- refresh_local(in_band);
- if (!eth_addr_is_zero(in_band->local_mac)
- && eth_addr_equals(dhcp->chaddr, in_band->local_mac)) {
- return true;
- }
- }
-
- return false;
-}
-
-/* Returns true if the rule that would match 'flow' with 'actions' is
- * allowed to be set up in the datapath. */
-bool
-in_band_rule_check(struct in_band *in_band, const flow_t *flow,
- const struct odp_actions *actions)
-{
- if (!in_band) {
- return true;
- }
-
- /* Don't allow flows that would prevent DHCP replies from being seen
- * by the local port. */
- if (flow->dl_type == htons(ETH_TYPE_IP)
- && flow->nw_proto == IP_TYPE_UDP
- && flow->tp_src == htons(DHCP_SERVER_PORT)
- && flow->tp_dst == htons(DHCP_CLIENT_PORT)) {
- int i;
-
- for (i=0; i<actions->n_actions; i++) {
- if (actions->actions[i].output.type == ODPAT_OUTPUT
- && actions->actions[i].output.port == ODPP_LOCAL) {
- return true;
- }
- }
- return false;
- }
-
- return true;
-}
-
static void
- drop_flow(struct in_band *in_band, int rule_idx)
+ init_rule(struct in_band_rule *rule, unsigned int priority)
+ {
- rule->wildcards = OVSFW_ALL;
- rule->priority = priority;
-
- /* Not strictly necessary but seems cleaner. */
++ /* Clearing the flow is not strictly necessary but it seems cleaner. */
+ memset(&rule->flow, 0, sizeof rule->flow);
++
++ rule->flow.wildcards = OVSFW_ALL;
++ rule->flow.priority = priority;
+ }
+
+ static void
-set_in_port(struct in_band_rule *rule, uint16_t odp_port)
++set_in_port(struct in_band_rule *rule, uint16_t ofp_port)
{
- struct ib_rule *rule = &in_band->rules[rule_idx];
- rule->wildcards &= ~OFPFW_IN_PORT;
- rule->flow.in_port = odp_port;
++ rule->flow.wildcards &= ~OFPFW_IN_PORT;
++ rule->flow.in_port = ofp_port;
+ }
- if (rule->installed) {
- rule->installed = false;
- ofproto_delete_flow(in_band->ofproto, &rule->flow);
+ static void
+ set_dl_type(struct in_band_rule *rule, uint16_t dl_type)
+ {
- rule->wildcards &= ~OFPFW_DL_TYPE;
++ rule->flow.wildcards &= ~OFPFW_DL_TYPE;
+ rule->flow.dl_type = dl_type;
+ }
+
+ static void
+ set_dl_src(struct in_band_rule *rule, const uint8_t dl_src[ETH_ADDR_LEN])
+ {
- rule->wildcards &= ~OFPFW_DL_SRC;
++ rule->flow.wildcards &= ~OFPFW_DL_SRC;
+ memcpy(rule->flow.dl_src, dl_src, ETH_ADDR_LEN);
+ }
+
+ static void
+ set_dl_dst(struct in_band_rule *rule, const uint8_t dl_dst[ETH_ADDR_LEN])
+ {
- rule->wildcards &= ~OFPFW_DL_DST;
++ rule->flow.wildcards &= ~OFPFW_DL_DST;
+ memcpy(rule->flow.dl_dst, dl_dst, ETH_ADDR_LEN);
+ }
+
+ static void
+ set_tp_src(struct in_band_rule *rule, uint16_t tp_src)
+ {
- rule->wildcards &= ~OFPFW_TP_SRC;
++ rule->flow.wildcards &= ~OFPFW_TP_SRC;
+ rule->flow.tp_src = tp_src;
+ }
+
+ static void
+ set_tp_dst(struct in_band_rule *rule, uint16_t tp_dst)
+ {
- rule->wildcards &= ~OFPFW_TP_DST;
++ rule->flow.wildcards &= ~OFPFW_TP_DST;
+ rule->flow.tp_dst = tp_dst;
+ }
+
+ static void
+ set_nw_proto(struct in_band_rule *rule, uint8_t nw_proto)
+ {
- rule->wildcards &= ~OFPFW_NW_PROTO;
++ rule->flow.wildcards &= ~OFPFW_NW_PROTO;
+ rule->flow.nw_proto = nw_proto;
+ }
+
+ static void
+ set_nw_src(struct in_band_rule *rule, const struct in_addr nw_src)
+ {
- rule->wildcards &= ~OFPFW_NW_SRC_MASK;
++ rule->flow.wildcards &= ~OFPFW_NW_SRC_MASK;
+ rule->flow.nw_src = nw_src.s_addr;
+ }
+
+ static void
+ set_nw_dst(struct in_band_rule *rule, const struct in_addr nw_dst)
+ {
- rule->wildcards &= ~OFPFW_NW_DST_MASK;
++ rule->flow.wildcards &= ~OFPFW_NW_DST_MASK;
+ rule->flow.nw_dst = nw_dst.s_addr;
+ }
+
+ static void
+ make_rules(struct in_band *ib,
+ void (*cb)(struct in_band *, const struct in_band_rule *))
+ {
+ struct in_band_rule rule;
+ size_t i;
+
+ if (!eth_addr_is_zero(ib->installed_local_mac)) {
+ /* (a) Allow DHCP requests sent from the local port. */
+ init_rule(&rule, IBR_FROM_LOCAL_DHCP);
- set_in_port(&rule, ODPP_LOCAL);
++ set_in_port(&rule, OFPP_LOCAL);
+ set_dl_type(&rule, htons(ETH_TYPE_IP));
+ set_dl_src(&rule, ib->installed_local_mac);
+ set_nw_proto(&rule, IP_TYPE_UDP);
+ set_tp_src(&rule, htons(DHCP_CLIENT_PORT));
+ set_tp_dst(&rule, htons(DHCP_SERVER_PORT));
+ cb(ib, &rule);
+
+ /* (b) Allow ARP replies to the local port's MAC address. */
+ init_rule(&rule, IBR_TO_LOCAL_ARP);
+ set_dl_type(&rule, htons(ETH_TYPE_ARP));
+ set_dl_dst(&rule, ib->installed_local_mac);
+ set_nw_proto(&rule, ARP_OP_REPLY);
+ cb(ib, &rule);
+
+ /* (c) Allow ARP requests from the local port's MAC address. */
+ init_rule(&rule, IBR_FROM_LOCAL_ARP);
+ set_dl_type(&rule, htons(ETH_TYPE_ARP));
+ set_dl_src(&rule, ib->installed_local_mac);
+ set_nw_proto(&rule, ARP_OP_REQUEST);
+ cb(ib, &rule);
+ }
+
+ for (i = 0; i < ib->n_remote_macs; i++) {
+ const uint8_t *remote_mac = &ib->remote_macs[i * ETH_ADDR_LEN];
+
+ if (i > 0) {
+ const uint8_t *prev_mac = &ib->remote_macs[(i - 1) * ETH_ADDR_LEN];
+ if (eth_addr_equals(remote_mac, prev_mac)) {
+ /* Skip duplicates. */
+ continue;
+ }
+ }
+
+ /* (d) Allow ARP replies to the next hop's MAC address. */
+ init_rule(&rule, IBR_TO_NEXT_HOP_ARP);
+ set_dl_type(&rule, htons(ETH_TYPE_ARP));
+ set_dl_dst(&rule, remote_mac);
+ set_nw_proto(&rule, ARP_OP_REPLY);
+ cb(ib, &rule);
+
+ /* (e) Allow ARP requests from the next hop's MAC address. */
+ init_rule(&rule, IBR_FROM_NEXT_HOP_ARP);
+ set_dl_type(&rule, htons(ETH_TYPE_ARP));
+ set_dl_src(&rule, remote_mac);
+ set_nw_proto(&rule, ARP_OP_REQUEST);
+ cb(ib, &rule);
+ }
+
+ for (i = 0; i < ib->n_remote_addrs; i++) {
+ const struct sockaddr_in *a = &ib->remote_addrs[i];
+
+ if (!i || a->sin_addr.s_addr != a[-1].sin_addr.s_addr) {
+ /* (f) Allow ARP replies containing the remote's IP address as a
+ * target. */
+ init_rule(&rule, IBR_TO_REMOTE_ARP);
+ set_dl_type(&rule, htons(ETH_TYPE_ARP));
+ set_nw_proto(&rule, ARP_OP_REPLY);
+ set_nw_dst(&rule, a->sin_addr);
+ cb(ib, &rule);
+
+ /* (g) Allow ARP requests containing the remote's IP address as a
+ * source. */
+ init_rule(&rule, IBR_FROM_REMOTE_ARP);
+ set_dl_type(&rule, htons(ETH_TYPE_ARP));
+ set_nw_proto(&rule, ARP_OP_REQUEST);
+ set_nw_src(&rule, a->sin_addr);
+ cb(ib, &rule);
+ }
+
+ if (!i
+ || a->sin_addr.s_addr != a[-1].sin_addr.s_addr
+ || a->sin_port != a[-1].sin_port) {
+ /* (h) Allow TCP traffic to the remote's IP and port. */
+ init_rule(&rule, IBR_TO_REMOTE_TCP);
+ set_dl_type(&rule, htons(ETH_TYPE_IP));
+ set_nw_proto(&rule, IP_TYPE_TCP);
+ set_nw_dst(&rule, a->sin_addr);
+ set_tp_dst(&rule, a->sin_port);
+ cb(ib, &rule);
+
+ /* (i) Allow TCP traffic from the remote's IP and port. */
+ init_rule(&rule, IBR_FROM_REMOTE_TCP);
+ set_dl_type(&rule, htons(ETH_TYPE_IP));
+ set_nw_proto(&rule, IP_TYPE_TCP);
+ set_nw_src(&rule, a->sin_addr);
+ set_tp_src(&rule, a->sin_port);
+ cb(ib, &rule);
+ }
}
}
- /* out_port and fixed_fields are assumed never to change. */
static void
- set_up_flow(struct in_band *in_band, int rule_idx, const flow_t *flow,
- uint32_t fixed_fields, uint16_t out_port)
+ drop_rule(struct in_band *ib, const struct in_band_rule *rule)
{
- struct ib_rule *rule = &in_band->rules[rule_idx];
- ofproto_delete_flow(ib->ofproto, &rule->flow,
- rule->wildcards, rule->priority);
++ ofproto_delete_flow(ib->ofproto, &rule->flow);
+ }
- if (!rule->installed || memcmp(flow, &rule->flow, sizeof *flow)) {
- union ofp_action action;
+ /* Drops from the flow table all of the flows set up by 'ib', then clears out
+ * the information about the installed flows so that they can be filled in
+ * again if necessary. */
+ static void
+ drop_rules(struct in_band *ib)
+ {
+ /* Drop rules. */
+ make_rules(ib, drop_rule);
+
+ /* Clear out state. */
+ memset(ib->installed_local_mac, 0, sizeof ib->installed_local_mac);
+
+ free(ib->remote_addrs);
+ ib->remote_addrs = NULL;
+ ib->n_remote_addrs = 0;
+
+ free(ib->remote_macs);
+ ib->remote_macs = NULL;
+ ib->n_remote_macs = 0;
+ }
+
+ static void
+ add_rule(struct in_band *ib, const struct in_band_rule *rule)
+ {
+ union ofp_action action;
- drop_flow(in_band, rule_idx);
+ action.type = htons(OFPAT_OUTPUT);
+ action.output.len = htons(sizeof action);
+ action.output.port = htons(OFPP_NORMAL);
+ action.output.max_len = htons(0);
- ofproto_add_flow(ib->ofproto, &rule->flow, rule->wildcards,
- rule->priority, &action, 1, 0);
++ ofproto_add_flow(ib->ofproto, &rule->flow, &action, 1, 0);
+ }
- rule->installed = true;
- rule->flow = *flow;
- rule->flow.wildcards = OFPFW_ALL & ~fixed_fields;
- rule->flow.priority = IB_BASE_PRIORITY + (N_IB_RULES - rule_idx);
+ /* Inserts flows into the flow table for the current state of 'ib'. */
+ static void
+ add_rules(struct in_band *ib)
+ {
+ make_rules(ib, add_rule);
+ }
- action.type = htons(OFPAT_OUTPUT);
- action.output.len = htons(sizeof action);
- action.output.port = htons(out_port);
- action.output.max_len = htons(0);
- ofproto_add_flow(in_band->ofproto, &rule->flow, &action, 1, 0);
+ static int
+ compare_addrs(const void *a_, const void *b_)
+ {
+ const struct sockaddr_in *a = a_;
+ const struct sockaddr_in *b = b_;
+ int cmp;
+
+ cmp = memcmp(&a->sin_addr.s_addr,
+ &b->sin_addr.s_addr,
+ sizeof a->sin_addr.s_addr);
+ if (cmp) {
+ return cmp;
}
+ return memcmp(&a->sin_port, &b->sin_port, sizeof a->sin_port);
+ }
+
+ static int
+ compare_macs(const void *a, const void *b)
+ {
+ return memcmp(a, b, ETH_ADDR_LEN);
}
void
}
int
-in_band_create(struct ofproto *ofproto, struct dpif *dpif,
+in_band_create(struct ofproto *ofproto, struct wdp *wdp,
- struct switch_status *ss, struct rconn *controller,
- struct in_band **in_bandp)
+ struct switch_status *ss, struct in_band **in_bandp)
{
struct in_band *in_band;
- char *local_name;
- char local_name[IF_NAMESIZE];
struct netdev *local_netdev;
++ char *local_name;
int error;
- error = dpif_port_get_name(dpif, ODPP_LOCAL,
- local_name, sizeof local_name);
+ error = wdp_port_get_name(wdp, OFPP_LOCAL, &local_name);
if (error) {
VLOG_ERR("failed to initialize in-band control: cannot get name "
"of datapath local port (%s)", strerror(error));
struct rconn;
struct settings;
struct switch_status;
+struct wdp;
-int in_band_create(struct ofproto *, struct dpif *, struct switch_status *,
+int in_band_create(struct ofproto *, struct wdp *, struct switch_status *,
- struct rconn *controller, struct in_band **);
+ struct in_band **);
void in_band_destroy(struct in_band *);
+
+ void in_band_set_remotes(struct in_band *,
+ const struct sockaddr_in *, size_t n);
+
void in_band_run(struct in_band *);
void in_band_wait(struct in_band *);
- const struct odp_actions *);
+
+ bool in_band_msg_in_hook(struct in_band *, const flow_t *,
+ const struct ofpbuf *packet);
+ bool in_band_rule_check(struct in_band *, const flow_t *,
++ const struct xflow_actions *);
void in_band_flushed(struct in_band *);
#endif /* in-band.h */
ofproto_sflow_options_destroy(os->options);
os->options = NULL;
- PORT_ARRAY_FOR_EACH (osp, &os->ports, xflow_port) {
- ofproto_sflow_del_port(os, xflow_port);
- }
- port_array_clear(&os->ports);
-
/* Turn off sampling to save CPU cycles. */
- dpif_set_sflow_probability(os->dpif, 0);
+ wdp_set_sflow_probability(os->wdp, 0);
}
bool
ofproto_sflow_destroy(struct ofproto_sflow *os)
{
if (os) {
- unsigned int odp_port;
+ struct ofproto_sflow_port *osp;
++ unsigned int xflow_port;
+
ofproto_sflow_clear(os);
- PORT_ARRAY_FOR_EACH (osp, &os->ports, odp_port) {
- ofproto_sflow_del_port(os, odp_port);
++ PORT_ARRAY_FOR_EACH (osp, &os->ports, xflow_port) {
++ ofproto_sflow_del_port(os, xflow_port);
+ }
port_array_destroy(&os->ports);
free(os);
}
osp->netdev = netdev;
ifindex = netdev_get_ifindex(netdev);
if (ifindex <= 0) {
- ifindex = (os->sflow_agent->subId << 16) + odp_port;
+ ifindex = (os->sflow_agent->subId << 16) + xflow_port;
}
SFL_DS_SET(osp->dsi, 0, ifindex, 0);
- port_array_set(&os->ports, odp_port, osp);
+ port_array_set(&os->ports, xflow_port, osp);
- /* Add poller. */
+ /* Add poller and sampler. */
if (os->sflow_agent) {
- ofproto_sflow_add_poller(os, osp, odp_port);
+ ofproto_sflow_add_poller(os, osp, xflow_port);
+ ofproto_sflow_add_sampler(os, osp);
}
}
sfl_receiver_set_sFlowRcvrTimeout(receiver, 0xffffffff);
/* Set the sampling_rate down in the datapath. */
- dpif_set_sflow_probability(os->dpif,
- MAX(1, UINT32_MAX / options->sampling_rate));
+ wdp_set_sflow_probability(os->wdp,
+ MAX(1, UINT32_MAX / options->sampling_rate));
/* Add samplers and pollers for the currently known ports. */
- PORT_ARRAY_FOR_EACH (osp, &os->ports, odp_port) {
- ofproto_sflow_add_poller(os, osp, odp_port);
+ PORT_ARRAY_FOR_EACH (osp, &os->ports, xflow_port) {
- ofproto_sflow_add_sampler(os, osp,
- options->sampling_rate, options->header_len);
++ ofproto_sflow_add_poller(os, osp, xflow_port);
+ ofproto_sflow_add_sampler(os, osp);
}
}
n_actions, msg->length, min_size);
return;
}
- actions = (const union odp_action *) (hdr + 1);
+ actions = (const union xflow_action *) (hdr + 1);
/* Get packet payload and extract flow. */
- payload.data = (union odp_action *) (actions + n_actions);
+ payload.data = (union xflow_action *) (actions + n_actions);
payload.size = msg->length - min_size;
- flow_extract(&payload, msg->port, &flow);
+ flow_extract(&payload, 0, msg->port, &flow);
/* Build a flow sample */
memset(&fs, 0, sizeof fs);
return false;
}
-static struct rule *rule_create(struct ofproto *, struct rule *super,
- const union ofp_action *, size_t n_actions,
- uint16_t idle_timeout, uint16_t hard_timeout,
- uint64_t flow_cookie, bool send_flow_removed);
-static void rule_free(struct rule *);
-static void rule_destroy(struct ofproto *, struct rule *);
-static struct rule *rule_from_cls_rule(const struct cls_rule *);
-static void rule_insert(struct ofproto *, struct rule *,
- struct ofpbuf *packet, uint16_t in_port);
-static void rule_remove(struct ofproto *, struct rule *);
-static bool rule_make_actions(struct ofproto *, struct rule *,
- const struct ofpbuf *packet);
-static void rule_install(struct ofproto *, struct rule *,
- struct rule *displaced_rule);
-static void rule_uninstall(struct ofproto *, struct rule *);
-static void rule_post_uninstall(struct ofproto *, struct rule *);
-static void send_flow_removed(struct ofproto *p, struct rule *rule,
- long long int now, uint8_t reason);
+static void delete_flow(struct ofproto *, struct wdp_rule *, uint8_t reason);
- struct ofconn {
- struct list node;
- struct rconn *rconn;
- struct pktbuf *pktbuf;
- int miss_send_len;
-
- struct rconn_packet_counter *packet_in_counter;
+ /* ofproto supports two kinds of OpenFlow connections:
+ *
+ * - "Controller connections": Connections to ordinary OpenFlow controllers.
+ * ofproto maintains persistent connections to these controllers and by
+ * default sends them asynchronous messages such as packet-ins.
+ *
+ * - "Transient connections", e.g. from ovs-ofctl. When these connections
+ * drop, it is the other side's responsibility to reconnect them if
+ * necessary. ofproto does not send them asynchronous messages by default.
+ */
+ enum ofconn_type {
+ OFCONN_CONTROLLER, /* An OpenFlow controller. */
+ OFCONN_TRANSIENT /* A transient connection. */
+ };
- /* Number of OpenFlow messages queued as replies to OpenFlow requests, and
- * the maximum number before we stop reading OpenFlow requests. */
+ /* An OpenFlow connection. */
+ struct ofconn {
+ struct ofproto *ofproto; /* The ofproto that owns this connection. */
+ struct list node; /* In struct ofproto's "all_conns" list. */
+ struct rconn *rconn; /* OpenFlow connection. */
+ enum ofconn_type type; /* Type. */
+
+ /* OFPT_PACKET_IN related data. */
+ struct rconn_packet_counter *packet_in_counter; /* # queued on 'rconn'. */
+ struct pinsched *schedulers[2]; /* Indexed by reason code; see below. */
+ struct pktbuf *pktbuf; /* OpenFlow packet buffers. */
+ int miss_send_len; /* Bytes to send of buffered packets. */
+
+ /* Number of OpenFlow messages queued on 'rconn' as replies to OpenFlow
+ * requests, and the maximum number before we stop reading OpenFlow
+ * requests. */
#define OFCONN_REPLY_MAX 100
struct rconn_packet_counter *reply_counter;
+
+ /* type == OFCONN_CONTROLLER only. */
+ enum nx_role role; /* Role. */
+ struct hmap_node hmap_node; /* In struct ofproto's "controllers" map. */
+ struct discovery *discovery; /* Controller discovery object, if enabled. */
+ struct status_category *ss; /* Switch status category. */
+ enum ofproto_band band; /* In-band or out-of-band? */
};
- static struct ofconn *ofconn_create(struct ofproto *, struct rconn *);
+ /* We use OFPR_NO_MATCH and OFPR_ACTION as indexes into struct ofconn's
+ * "schedulers" array. Their values are 0 and 1, and their meanings and values
- * coincide with _ODPL_MISS_NR and _ODPL_ACTION_NR, so this is convenient. In
++ * coincide with WDP_CHAN_MISS and WDP_CHAN_ACTION, so this is convenient. In
+ * case anything ever changes, check their values here. */
+ #define N_SCHEDULERS 2
+ BUILD_ASSERT_DECL(OFPR_NO_MATCH == 0);
-BUILD_ASSERT_DECL(OFPR_NO_MATCH == _ODPL_MISS_NR);
++BUILD_ASSERT_DECL(OFPR_NO_MATCH == WDP_CHAN_MISS);
+ BUILD_ASSERT_DECL(OFPR_ACTION == 1);
-BUILD_ASSERT_DECL(OFPR_ACTION == _ODPL_ACTION_NR);
++BUILD_ASSERT_DECL(OFPR_ACTION == WDP_CHAN_ACTION);
+
+ static struct ofconn *ofconn_create(struct ofproto *, struct rconn *,
+ enum ofconn_type);
static void ofconn_destroy(struct ofconn *);
static void ofconn_run(struct ofconn *, struct ofproto *);
static void ofconn_wait(struct ofconn *);
static void queue_tx(struct ofpbuf *msg, const struct ofconn *ofconn,
struct rconn_packet_counter *counter);
-static void send_packet_in(struct ofproto *, struct ofpbuf *odp_msg);
-static void do_send_packet_in(struct ofpbuf *odp_msg, void *ofconn);
++static void send_packet_in(struct ofproto *, struct wdp_packet *);
++static void do_send_packet_in(struct wdp_packet *, void *ofconn);
+
struct ofproto {
/* Settings. */
uint64_t datapath_id; /* Datapath ID. */
/* Configuration. */
struct switch_status *switch_status;
- struct status_category *ss_cat;
- struct in_band *in_band;
- struct discovery *discovery;
struct fail_open *fail_open;
- struct pinsched *miss_sched, *action_sched;
struct netflow *netflow;
struct ofproto_sflow *sflow;
++ bool tun_id_from_cookie;
+
+ /* In-band control. */
+ struct in_band *in_band;
+ long long int next_in_band_update;
+ struct sockaddr_in *extra_in_band_remotes;
+ size_t n_extra_remotes;
- /* Flow table. */
- struct classifier cls;
- bool need_revalidate;
- long long int next_expiration;
- struct tag_set revalidate_set;
- bool tun_id_from_cookie;
-
/* OpenFlow connections. */
- struct list all_conns;
- struct ofconn *controller;
+ struct hmap controllers; /* Controller "struct ofconn"s. */
+ struct list all_conns; /* Contains "struct ofconn"s. */
struct pvconn **listeners;
size_t n_listeners;
struct pvconn **snoops;
static uint64_t pick_datapath_id(const struct ofproto *);
static uint64_t pick_fallback_dpid(void);
- static void send_packet_in_miss(struct wdp_packet *, void *ofproto);
- static void send_packet_in_action(struct wdp_packet *, void *ofproto);
-static void update_used(struct ofproto *);
-static void update_stats(struct ofproto *, struct rule *,
- const struct odp_flow_stats *);
-static void expire_rule(struct cls_rule *, void *ofproto);
-static void active_timeout(struct ofproto *ofproto, struct rule *rule);
-static bool revalidate_rule(struct ofproto *p, struct rule *rule);
-static void revalidate_cb(struct cls_rule *rule_, void *p_);
-
-static void handle_odp_msg(struct ofproto *, struct ofpbuf *);
+static void handle_wdp_packet(struct ofproto *, struct wdp_packet *);
static void handle_openflow(struct ofconn *, struct ofproto *,
struct ofpbuf *);
p->netflow = NULL;
p->sflow = NULL;
- /* Initialize flow table. */
- classifier_init(&p->cls);
- p->need_revalidate = false;
- p->next_expiration = time_msec() + 1000;
- tag_set_init(&p->revalidate_set);
-
/* Initialize OpenFlow connections. */
list_init(&p->all_conns);
- p->controller = ofconn_create(p, rconn_create(5, 8));
- p->controller->pktbuf = pktbuf_create();
- p->controller->miss_send_len = OFP_DEFAULT_MISS_SEND_LEN;
+ hmap_init(&p->controllers);
p->listeners = NULL;
p->n_listeners = 0;
p->snoops = NULL;
uint64_t old_dpid = p->datapath_id;
p->datapath_id = datapath_id ? datapath_id : pick_datapath_id(p);
if (p->datapath_id != old_dpid) {
+ struct ofconn *ofconn;
+
VLOG_INFO("datapath ID changed to %016"PRIx64, p->datapath_id);
- rconn_reconnect(p->controller->rconn);
+
+ /* Force all active connections to reconnect, since there is no way to
+ * notify a controller that the datapath ID has changed. */
+ LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
+ rconn_reconnect(ofconn->rconn);
+ }
+ }
+ }
+
+ static bool
+ is_discovery_controller(const struct ofproto_controller *c)
+ {
+ return !strcmp(c->target, "discover");
+ }
+
+ static bool
+ is_in_band_controller(const struct ofproto_controller *c)
+ {
+ return is_discovery_controller(c) || c->band == OFPROTO_IN_BAND;
+ }
+
+ /* Creates a new controller in 'ofproto'. Some of the settings are initially
+ * drawn from 'c', but update_controller() needs to be called later to finish
+ * the new ofconn's configuration. */
+ static void
+ add_controller(struct ofproto *ofproto, const struct ofproto_controller *c)
+ {
+ struct discovery *discovery;
+ struct ofconn *ofconn;
+
+ if (is_discovery_controller(c)) {
+ int error = discovery_create(c->accept_re, c->update_resolv_conf,
- ofproto->dpif, ofproto->switch_status,
++ ofproto->wdp, ofproto->switch_status,
+ &discovery);
+ if (error) {
+ return;
+ }
+ } else {
+ discovery = NULL;
+ }
+
+ ofconn = ofconn_create(ofproto, rconn_create(5, 8), OFCONN_CONTROLLER);
+ ofconn->pktbuf = pktbuf_create();
+ ofconn->miss_send_len = OFP_DEFAULT_MISS_SEND_LEN;
+ if (discovery) {
+ ofconn->discovery = discovery;
+ } else {
+ rconn_connect(ofconn->rconn, c->target);
+ }
+ hmap_insert(&ofproto->controllers, &ofconn->hmap_node,
+ hash_string(c->target, 0));
+ }
+
+ /* Reconfigures 'ofconn' to match 'c'. This function cannot update an ofconn's
+ * target or turn discovery on or off (these are done by creating new ofconns
+ * and deleting old ones), but it can update the rest of an ofconn's
+ * settings. */
+ static void
+ update_controller(struct ofconn *ofconn, const struct ofproto_controller *c)
+ {
+ struct ofproto *ofproto = ofconn->ofproto;
+ int probe_interval;
+ int i;
+
+ ofconn->band = (is_in_band_controller(c)
+ ? OFPROTO_IN_BAND : OFPROTO_OUT_OF_BAND);
+
+ rconn_set_max_backoff(ofconn->rconn, c->max_backoff);
+
+ probe_interval = c->probe_interval ? MAX(c->probe_interval, 5) : 0;
+ rconn_set_probe_interval(ofconn->rconn, probe_interval);
+
+ if (ofconn->discovery) {
+ discovery_set_update_resolv_conf(ofconn->discovery,
+ c->update_resolv_conf);
+ discovery_set_accept_controller_re(ofconn->discovery, c->accept_re);
+ }
+
+ for (i = 0; i < N_SCHEDULERS; i++) {
+ struct pinsched **s = &ofconn->schedulers[i];
+
+ if (c->rate_limit > 0) {
+ if (!*s) {
+ *s = pinsched_create(c->rate_limit, c->burst_limit,
+ ofproto->switch_status);
+ } else {
+ pinsched_set_limits(*s, c->rate_limit, c->burst_limit);
+ }
+ } else {
+ pinsched_destroy(*s);
+ *s = NULL;
+ }
+ }
+ }
+
+ static const char *
+ ofconn_get_target(const struct ofconn *ofconn)
+ {
+ return ofconn->discovery ? "discover" : rconn_get_name(ofconn->rconn);
+ }
+
+ static struct ofconn *
+ find_controller_by_target(struct ofproto *ofproto, const char *target)
+ {
+ struct ofconn *ofconn;
+
+ HMAP_FOR_EACH_WITH_HASH (ofconn, struct ofconn, hmap_node,
+ hash_string(target, 0), &ofproto->controllers) {
+ if (!strcmp(ofconn_get_target(ofconn), target)) {
+ return ofconn;
+ }
}
- in_band_create(ofproto, ofproto->dpif, ofproto->switch_status,
+ return NULL;
+ }
+
+ static void
+ update_in_band_remotes(struct ofproto *ofproto)
+ {
+ const struct ofconn *ofconn;
+ struct sockaddr_in *addrs;
+ size_t max_addrs, n_addrs;
+ bool discovery;
+ size_t i;
+
+ /* Allocate enough memory for as many remotes as we could possibly have. */
+ max_addrs = ofproto->n_extra_remotes + hmap_count(&ofproto->controllers);
+ addrs = xmalloc(max_addrs * sizeof *addrs);
+ n_addrs = 0;
+
+ /* Add all the remotes. */
+ discovery = false;
+ HMAP_FOR_EACH (ofconn, struct ofconn, hmap_node, &ofproto->controllers) {
+ struct sockaddr_in *sin = &addrs[n_addrs];
+
+ sin->sin_addr.s_addr = rconn_get_remote_ip(ofconn->rconn);
+ if (sin->sin_addr.s_addr) {
+ sin->sin_port = rconn_get_remote_port(ofconn->rconn);
+ n_addrs++;
+ }
+ if (ofconn->discovery) {
+ discovery = true;
+ }
+ }
+ for (i = 0; i < ofproto->n_extra_remotes; i++) {
+ addrs[n_addrs++] = ofproto->extra_in_band_remotes[i];
+ }
+
+ /* Create or update or destroy in-band.
+ *
+ * Ordinarily we only enable in-band if there's at least one remote
+ * address, but discovery needs the in-band rules for DHCP to be installed
+ * even before we know any remote addresses. */
+ if (n_addrs || discovery) {
+ if (!ofproto->in_band) {
++ in_band_create(ofproto, ofproto->wdp, ofproto->switch_status,
+ &ofproto->in_band);
+ }
+ in_band_set_remotes(ofproto->in_band, addrs, n_addrs);
+ ofproto->next_in_band_update = time_msec() + 1000;
+ } else {
+ in_band_destroy(ofproto->in_band);
+ ofproto->in_band = NULL;
+ }
+
+ /* Clean up. */
+ free(addrs);
}
void
return;
}
- /* Destroy fail-open early, because it touches the classifier. */
- ofproto_set_failure(p, false);
+ /* Destroy fail-open and in-band early, since they touch the classifier. */
+ fail_open_destroy(p->fail_open);
+ p->fail_open = NULL;
+
+ in_band_destroy(p->in_band);
+ p->in_band = NULL;
+ free(p->extra_in_band_remotes);
ofproto_flush_flows(p);
- classifier_destroy(&p->cls);
LIST_FOR_EACH_SAFE (ofconn, next_ofconn, struct ofconn, node,
&p->all_conns) {
ofconn_destroy(ofconn);
}
+ hmap_destroy(&p->controllers);
- dpif_close(p->dpif);
- netdev_monitor_destroy(p->netdev_monitor);
- PORT_ARRAY_FOR_EACH (ofport, &p->ports, port_no) {
- ofport_free(ofport);
- }
- shash_destroy(&p->port_by_name);
+ wdp_close(p->wdp);
switch_status_destroy(p->switch_status);
- in_band_destroy(p->in_band);
- discovery_destroy(p->discovery);
- pinsched_destroy(p->miss_sched);
- pinsched_destroy(p->action_sched);
netflow_destroy(p->netflow);
ofproto_sflow_destroy(p->sflow);
return error;
}
-static void
-process_port_change(struct ofproto *ofproto, int error, char *devname)
-{
- if (error == ENOBUFS) {
- reinit_ports(ofproto);
- } else if (!error) {
- update_port(ofproto, devname);
- free(devname);
- }
-}
-
+ /* Returns a "preference level" for snooping 'ofconn'. A higher return value
+ * means that 'ofconn' is more interesting for monitoring than a lower return
+ * value. */
+ static int
+ snoop_preference(const struct ofconn *ofconn)
+ {
+ switch (ofconn->role) {
+ case NX_ROLE_MASTER:
+ return 3;
+ case NX_ROLE_OTHER:
+ return 2;
+ case NX_ROLE_SLAVE:
+ return 1;
+ default:
+ /* Shouldn't happen. */
+ return 0;
+ }
+ }
+
+ /* One of ofproto's "snoop" pvconns has accepted a new connection on 'vconn'.
+ * Connects this vconn to a controller. */
+ static void
+ add_snooper(struct ofproto *ofproto, struct vconn *vconn)
+ {
+ struct ofconn *ofconn, *best;
+
+ /* Pick a controller for monitoring. */
+ best = NULL;
+ LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) {
+ if (ofconn->type == OFCONN_CONTROLLER
+ && (!best || snoop_preference(ofconn) > snoop_preference(best))) {
+ best = ofconn;
+ }
+ }
+
+ if (best) {
+ rconn_add_monitor(best->rconn, vconn);
+ } else {
+ VLOG_INFO_RL(&rl, "no controller connection to snoop");
+ vconn_close(vconn);
+ }
+ }
+
int
ofproto_run1(struct ofproto *p)
{
}
}
\f
-static void
-reinit_ports(struct ofproto *p)
-{
- struct svec devnames;
- struct ofport *ofport;
- unsigned int port_no;
- struct odp_port *odp_ports;
- size_t n_odp_ports;
- size_t i;
-
- svec_init(&devnames);
- PORT_ARRAY_FOR_EACH (ofport, &p->ports, port_no) {
- svec_add (&devnames, (char *) ofport->opp.name);
- }
- dpif_port_list(p->dpif, &odp_ports, &n_odp_ports);
- for (i = 0; i < n_odp_ports; i++) {
- svec_add (&devnames, odp_ports[i].devname);
- }
- free(odp_ports);
-
- svec_sort_unique(&devnames);
- for (i = 0; i < devnames.n; i++) {
- update_port(p, devnames.names[i]);
- }
- svec_destroy(&devnames);
-}
-
-static size_t
-refresh_port_group(struct ofproto *p, unsigned int group)
-{
- uint16_t *ports;
- size_t n_ports;
- struct ofport *port;
- unsigned int port_no;
-
- assert(group == DP_GROUP_ALL || group == DP_GROUP_FLOOD);
-
- ports = xmalloc(port_array_count(&p->ports) * sizeof *ports);
- n_ports = 0;
- PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) {
- if (group == DP_GROUP_ALL || !(port->opp.config & OFPPC_NO_FLOOD)) {
- ports[n_ports++] = port_no;
- }
- }
- dpif_port_group_set(p->dpif, group, ports, n_ports);
- free(ports);
-
- return n_ports;
-}
-
-static void
-refresh_port_groups(struct ofproto *p)
-{
- size_t n_flood = refresh_port_group(p, DP_GROUP_FLOOD);
- size_t n_all = refresh_port_group(p, DP_GROUP_ALL);
- if (p->sflow) {
- ofproto_sflow_set_group_sizes(p->sflow, n_flood, n_all);
- }
-}
-
-static struct ofport *
-make_ofport(const struct odp_port *odp_port)
-{
- struct netdev_options netdev_options;
- enum netdev_flags flags;
- struct ofport *ofport;
- struct netdev *netdev;
- bool carrier;
- int error;
-
- memset(&netdev_options, 0, sizeof netdev_options);
- netdev_options.name = odp_port->devname;
- netdev_options.ethertype = NETDEV_ETH_TYPE_NONE;
- netdev_options.may_open = true;
-
- error = netdev_open(&netdev_options, &netdev);
- if (error) {
- VLOG_WARN_RL(&rl, "ignoring port %s (%"PRIu16") because netdev %s "
- "cannot be opened (%s)",
- odp_port->devname, odp_port->port,
- odp_port->devname, strerror(error));
- return NULL;
- }
-
- ofport = xmalloc(sizeof *ofport);
- ofport->netdev = netdev;
- ofport->opp.port_no = odp_port_to_ofp_port(odp_port->port);
- netdev_get_etheraddr(netdev, ofport->opp.hw_addr);
- memcpy(ofport->opp.name, odp_port->devname,
- MIN(sizeof ofport->opp.name, sizeof odp_port->devname));
- ofport->opp.name[sizeof ofport->opp.name - 1] = '\0';
-
- netdev_get_flags(netdev, &flags);
- ofport->opp.config = flags & NETDEV_UP ? 0 : OFPPC_PORT_DOWN;
-
- netdev_get_carrier(netdev, &carrier);
- ofport->opp.state = carrier ? 0 : OFPPS_LINK_DOWN;
-
- netdev_get_features(netdev,
- &ofport->opp.curr, &ofport->opp.advertised,
- &ofport->opp.supported, &ofport->opp.peer);
- return ofport;
-}
-
-static bool
-ofport_conflicts(const struct ofproto *p, const struct odp_port *odp_port)
-{
- if (port_array_get(&p->ports, odp_port->port)) {
- VLOG_WARN_RL(&rl, "ignoring duplicate port %"PRIu16" in datapath",
- odp_port->port);
- return true;
- } else if (shash_find(&p->port_by_name, odp_port->devname)) {
- VLOG_WARN_RL(&rl, "ignoring duplicate device %s in datapath",
- odp_port->devname);
- return true;
- } else {
- return false;
- }
-}
-
-static int
-ofport_equal(const struct ofport *a_, const struct ofport *b_)
-{
- const struct ofp_phy_port *a = &a_->opp;
- const struct ofp_phy_port *b = &b_->opp;
-
- BUILD_ASSERT_DECL(sizeof *a == 48); /* Detect ofp_phy_port changes. */
- return (a->port_no == b->port_no
- && !memcmp(a->hw_addr, b->hw_addr, sizeof a->hw_addr)
- && !strcmp((char *) a->name, (char *) b->name)
- && a->state == b->state
- && a->config == b->config
- && a->curr == b->curr
- && a->advertised == b->advertised
- && a->supported == b->supported
- && a->peer == b->peer);
-}
-
-static void
-send_port_status(struct ofproto *p, const struct ofport *ofport,
- uint8_t reason)
-{
- /* XXX Should limit the number of queued port status change messages. */
- struct ofconn *ofconn;
- LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
- struct ofp_port_status *ops;
- struct ofpbuf *b;
-
- if (!ofconn_receives_async_msgs(ofconn)) {
- continue;
- }
-
- ops = make_openflow_xid(sizeof *ops, OFPT_PORT_STATUS, 0, &b);
- ops->reason = reason;
- ops->desc = ofport->opp;
- hton_ofp_phy_port(&ops->desc);
- queue_tx(b, ofconn, NULL);
- }
- if (p->ofhooks->port_changed_cb) {
- p->ofhooks->port_changed_cb(reason, &ofport->opp, p->aux);
- }
-}
-
-static void
-ofport_install(struct ofproto *p, struct ofport *ofport)
-{
- uint16_t odp_port = ofp_port_to_odp_port(ofport->opp.port_no);
- const char *netdev_name = (const char *) ofport->opp.name;
-
- netdev_monitor_add(p->netdev_monitor, ofport->netdev);
- port_array_set(&p->ports, odp_port, ofport);
- shash_add(&p->port_by_name, netdev_name, ofport);
- if (p->sflow) {
- ofproto_sflow_add_port(p->sflow, odp_port, netdev_name);
- }
-}
-
-static void
-ofport_remove(struct ofproto *p, struct ofport *ofport)
-{
- uint16_t odp_port = ofp_port_to_odp_port(ofport->opp.port_no);
-
- netdev_monitor_remove(p->netdev_monitor, ofport->netdev);
- port_array_set(&p->ports, odp_port, NULL);
- shash_delete(&p->port_by_name,
- shash_find(&p->port_by_name, (char *) ofport->opp.name));
- if (p->sflow) {
- ofproto_sflow_del_port(p->sflow, odp_port);
- }
-}
-
-static void
-ofport_free(struct ofport *ofport)
-{
- if (ofport) {
- netdev_close(ofport->netdev);
- free(ofport);
- }
-}
-
-static void
-update_port(struct ofproto *p, const char *devname)
-{
- struct odp_port odp_port;
- struct ofport *old_ofport;
- struct ofport *new_ofport;
- int error;
-
- COVERAGE_INC(ofproto_update_port);
-
- /* Query the datapath for port information. */
- error = dpif_port_query_by_name(p->dpif, devname, &odp_port);
-
- /* Find the old ofport. */
- old_ofport = shash_find_data(&p->port_by_name, devname);
- if (!error) {
- if (!old_ofport) {
- /* There's no port named 'devname' but there might be a port with
- * the same port number. This could happen if a port is deleted
- * and then a new one added in its place very quickly, or if a port
- * is renamed. In the former case we want to send an OFPPR_DELETE
- * and an OFPPR_ADD, and in the latter case we want to send a
- * single OFPPR_MODIFY. We can distinguish the cases by comparing
- * the old port's ifindex against the new port, or perhaps less
- * reliably but more portably by comparing the old port's MAC
- * against the new port's MAC. However, this code isn't that smart
- * and always sends an OFPPR_MODIFY (XXX). */
- old_ofport = port_array_get(&p->ports, odp_port.port);
- }
- } else if (error != ENOENT && error != ENODEV) {
- VLOG_WARN_RL(&rl, "dpif_port_query_by_name returned unexpected error "
- "%s", strerror(error));
- return;
- }
-
- /* Create a new ofport. */
- new_ofport = !error ? make_ofport(&odp_port) : NULL;
-
- /* Eliminate a few pathological cases. */
- if (!old_ofport && !new_ofport) {
- return;
- } else if (old_ofport && new_ofport) {
- /* Most of the 'config' bits are OpenFlow soft state, but
- * OFPPC_PORT_DOWN is maintained the kernel. So transfer the OpenFlow
- * bits from old_ofport. (make_ofport() only sets OFPPC_PORT_DOWN and
- * leaves the other bits 0.) */
- new_ofport->opp.config |= old_ofport->opp.config & ~OFPPC_PORT_DOWN;
-
- if (ofport_equal(old_ofport, new_ofport)) {
- /* False alarm--no change. */
- ofport_free(new_ofport);
- return;
- }
- }
-
- /* Now deal with the normal cases. */
- if (old_ofport) {
- ofport_remove(p, old_ofport);
- }
- if (new_ofport) {
- ofport_install(p, new_ofport);
- }
- send_port_status(p, new_ofport ? new_ofport : old_ofport,
- (!old_ofport ? OFPPR_ADD
- : !new_ofport ? OFPPR_DELETE
- : OFPPR_MODIFY));
- ofport_free(old_ofport);
-
- /* Update port groups. */
- refresh_port_groups(p);
-}
-
-static int
-init_ports(struct ofproto *p)
-{
- struct odp_port *ports;
- size_t n_ports;
- size_t i;
- int error;
-
- error = dpif_port_list(p->dpif, &ports, &n_ports);
- if (error) {
- return error;
- }
-
- for (i = 0; i < n_ports; i++) {
- const struct odp_port *odp_port = &ports[i];
- if (!ofport_conflicts(p, odp_port)) {
- struct ofport *ofport = make_ofport(odp_port);
- if (ofport) {
- ofport_install(p, ofport);
- }
- }
- }
- free(ports);
- refresh_port_groups(p);
- return 0;
-}
-\f
static struct ofconn *
- ofconn_create(struct ofproto *p, struct rconn *rconn)
+ ofconn_create(struct ofproto *p, struct rconn *rconn, enum ofconn_type type)
{
- struct ofconn *ofconn = xmalloc(sizeof *ofconn);
+ struct ofconn *ofconn = xzalloc(sizeof *ofconn);
+ ofconn->ofproto = p;
list_push_back(&p->all_conns, &ofconn->node);
ofconn->rconn = rconn;
+ ofconn->type = type;
+ ofconn->role = NX_ROLE_OTHER;
+ ofconn->packet_in_counter = rconn_packet_counter_create ();
ofconn->pktbuf = NULL;
ofconn->miss_send_len = 0;
- ofconn->packet_in_counter = rconn_packet_counter_create ();
ofconn->reply_counter = rconn_packet_counter_create ();
return ofconn;
}
COVERAGE_INC(ofproto_ofconn_stuck);
}
}
+
+ /* Returns true if 'ofconn' should receive asynchronous messages. */
+ static bool
+ ofconn_receives_async_msgs(const struct ofconn *ofconn)
+ {
+ if (ofconn->type == OFCONN_CONTROLLER) {
+ /* Ordinary controllers always get asynchronous messages unless they
+ * have configured themselves as "slaves". */
+ return ofconn->role != NX_ROLE_SLAVE;
+ } else {
+ /* Transient connections don't get asynchronous messages unless they
+ * have explicitly asked for them by setting a nonzero miss send
+ * length. */
+ return ofconn->miss_send_len > 0;
+ }
+ }
\f
-/* Caller is responsible for initializing the 'cr' member of the returned
- * rule. */
-static struct rule *
-rule_create(struct ofproto *ofproto, struct rule *super,
- const union ofp_action *actions, size_t n_actions,
- uint16_t idle_timeout, uint16_t hard_timeout,
- uint64_t flow_cookie, bool send_flow_removed)
-{
- struct rule *rule = xzalloc(sizeof *rule);
- rule->idle_timeout = idle_timeout;
- rule->hard_timeout = hard_timeout;
- rule->flow_cookie = flow_cookie;
- rule->used = rule->created = time_msec();
- rule->send_flow_removed = send_flow_removed;
- rule->super = super;
- if (super) {
- list_push_back(&super->list, &rule->list);
- } else {
- list_init(&rule->list);
- }
- rule->n_actions = n_actions;
- rule->actions = xmemdup(actions, n_actions * sizeof *actions);
- netflow_flow_clear(&rule->nf_flow);
- netflow_flow_update_time(ofproto->netflow, &rule->nf_flow, rule->created);
-
- return rule;
-}
-
-static struct rule *
-rule_from_cls_rule(const struct cls_rule *cls_rule)
-{
- return cls_rule ? CONTAINER_OF(cls_rule, struct rule, cr) : NULL;
-}
-
-static void
-rule_free(struct rule *rule)
-{
- free(rule->actions);
- free(rule->odp_actions);
- free(rule);
-}
-
-/* Destroys 'rule'. If 'rule' is a subrule, also removes it from its
- * super-rule's list of subrules. If 'rule' is a super-rule, also iterates
- * through all of its subrules and revalidates them, destroying any that no
- * longer has a super-rule (which is probably all of them).
- *
- * Before calling this function, the caller must make have removed 'rule' from
- * the classifier. If 'rule' is an exact-match rule, the caller is also
- * responsible for ensuring that it has been uninstalled from the datapath. */
-static void
-rule_destroy(struct ofproto *ofproto, struct rule *rule)
-{
- if (!rule->super) {
- struct rule *subrule, *next;
- LIST_FOR_EACH_SAFE (subrule, next, struct rule, list, &rule->list) {
- revalidate_rule(ofproto, subrule);
- }
- } else {
- list_remove(&rule->list);
- }
- rule_free(rule);
-}
-
static bool
-rule_has_out_port(const struct rule *rule, uint16_t out_port)
+rule_has_out_port(const struct wdp_rule *rule, uint16_t out_port)
{
const union ofp_action *oa;
struct actions_iterator i;
handle_echo_request(struct ofconn *ofconn, struct ofp_header *oh)
{
struct ofp_header *rq = oh;
- queue_tx(make_echo_reply(rq), ofconn, ofconn->reply_counter);
- return 0;
-}
-
-static int
-handle_features_request(struct ofproto *p, struct ofconn *ofconn,
- struct ofp_header *oh)
-{
- struct ofp_switch_features *osf;
- struct ofpbuf *buf;
- unsigned int port_no;
- struct ofport *port;
-
- osf = make_openflow_xid(sizeof *osf, OFPT_FEATURES_REPLY, oh->xid, &buf);
- osf->datapath_id = htonll(p->datapath_id);
- osf->n_buffers = htonl(pktbuf_capacity());
- osf->n_tables = 2;
- osf->capabilities = htonl(OFPC_FLOW_STATS | OFPC_TABLE_STATS |
- OFPC_PORT_STATS | OFPC_ARP_MATCH_IP);
- osf->actions = htonl((1u << OFPAT_OUTPUT) |
- (1u << OFPAT_SET_VLAN_VID) |
- (1u << OFPAT_SET_VLAN_PCP) |
- (1u << OFPAT_STRIP_VLAN) |
- (1u << OFPAT_SET_DL_SRC) |
- (1u << OFPAT_SET_DL_DST) |
- (1u << OFPAT_SET_NW_SRC) |
- (1u << OFPAT_SET_NW_DST) |
- (1u << OFPAT_SET_NW_TOS) |
- (1u << OFPAT_SET_TP_SRC) |
- (1u << OFPAT_SET_TP_DST));
-
- PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) {
- hton_ofp_phy_port(ofpbuf_put(buf, &port->opp, sizeof port->opp));
- }
-
- queue_tx(buf, ofconn, ofconn->reply_counter);
- return 0;
-}
-
-static int
-handle_get_config_request(struct ofproto *p, struct ofconn *ofconn,
- struct ofp_header *oh)
-{
- struct ofpbuf *buf;
- struct ofp_switch_config *osc;
- uint16_t flags;
- bool drop_frags;
-
- /* Figure out flags. */
- dpif_get_drop_frags(p->dpif, &drop_frags);
- flags = drop_frags ? OFPC_FRAG_DROP : OFPC_FRAG_NORMAL;
-
- /* Send reply. */
- osc = make_openflow_xid(sizeof *osc, OFPT_GET_CONFIG_REPLY, oh->xid, &buf);
- osc->flags = htons(flags);
- osc->miss_send_len = htons(ofconn->miss_send_len);
- queue_tx(buf, ofconn, ofconn->reply_counter);
-
- return 0;
-}
-
-static int
-handle_set_config(struct ofproto *p, struct ofconn *ofconn,
- struct ofp_switch_config *osc)
-{
- uint16_t flags;
- int error;
-
- error = check_ofp_message(&osc->header, OFPT_SET_CONFIG, sizeof *osc);
- if (error) {
- return error;
- }
- flags = ntohs(osc->flags);
-
- if (ofconn->type == OFCONN_CONTROLLER && ofconn->role != NX_ROLE_SLAVE) {
- switch (flags & OFPC_FRAG_MASK) {
- case OFPC_FRAG_NORMAL:
- dpif_set_drop_frags(p->dpif, false);
- break;
- case OFPC_FRAG_DROP:
- dpif_set_drop_frags(p->dpif, true);
- break;
- default:
- VLOG_WARN_RL(&rl, "requested bad fragment mode (flags=%"PRIx16")",
- osc->flags);
- break;
- }
- }
-
- ofconn->miss_send_len = ntohs(osc->miss_send_len);
-
- return 0;
-}
-
-static void
-add_output_group_action(struct odp_actions *actions, uint16_t group,
- uint16_t *nf_output_iface)
-{
- odp_actions_add(actions, ODPAT_OUTPUT_GROUP)->output_group.group = group;
-
- if (group == DP_GROUP_ALL || group == DP_GROUP_FLOOD) {
- *nf_output_iface = NF_OUT_FLOOD;
- }
-}
-
-static void
-add_controller_action(struct odp_actions *actions,
- const struct ofp_action_output *oao)
-{
- union odp_action *a = odp_actions_add(actions, ODPAT_CONTROLLER);
- a->controller.arg = ntohs(oao->max_len);
-}
-
-struct action_xlate_ctx {
- /* Input. */
- flow_t flow; /* Flow to which these actions correspond. */
- int recurse; /* Recursion level, via xlate_table_action. */
- struct ofproto *ofproto;
- const struct ofpbuf *packet; /* The packet corresponding to 'flow', or a
- * null pointer if we are revalidating
- * without a packet to refer to. */
-
- /* Output. */
- struct odp_actions *out; /* Datapath actions. */
- tag_type *tags; /* Tags associated with OFPP_NORMAL actions. */
- bool may_set_up_flow; /* True ordinarily; false if the actions must
- * be reassessed for every packet. */
- uint16_t nf_output_iface; /* Output interface index for NetFlow. */
-};
-
-static void do_xlate_actions(const union ofp_action *in, size_t n_in,
- struct action_xlate_ctx *ctx);
-
-static void
-add_output_action(struct action_xlate_ctx *ctx, uint16_t port)
-{
- const struct ofport *ofport = port_array_get(&ctx->ofproto->ports, port);
-
- if (ofport) {
- if (ofport->opp.config & OFPPC_NO_FWD) {
- /* Forwarding disabled on port. */
- return;
- }
- } else {
- /*
- * We don't have an ofport record for this port, but it doesn't hurt to
- * allow forwarding to it anyhow. Maybe such a port will appear later
- * and we're pre-populating the flow table.
- */
- }
-
- odp_actions_add(ctx->out, ODPAT_OUTPUT)->output.port = port;
- ctx->nf_output_iface = port;
-}
-
-static struct rule *
-lookup_valid_rule(struct ofproto *ofproto, const flow_t *flow)
-{
- struct rule *rule;
- rule = rule_from_cls_rule(classifier_lookup(&ofproto->cls, flow));
-
- /* The rule we found might not be valid, since we could be in need of
- * revalidation. If it is not valid, don't return it. */
- if (rule
- && rule->super
- && ofproto->need_revalidate
- && !revalidate_rule(ofproto, rule)) {
- COVERAGE_INC(ofproto_invalidated);
- return NULL;
- }
-
- return rule;
-}
-
-static void
-xlate_table_action(struct action_xlate_ctx *ctx, uint16_t in_port)
-{
- if (!ctx->recurse) {
- uint16_t old_in_port;
- struct rule *rule;
-
- /* Look up a flow with 'in_port' as the input port. Then restore the
- * original input port (otherwise OFPP_NORMAL and OFPP_IN_PORT will
- * have surprising behavior). */
- old_in_port = ctx->flow.in_port;
- ctx->flow.in_port = in_port;
- rule = lookup_valid_rule(ctx->ofproto, &ctx->flow);
- ctx->flow.in_port = old_in_port;
-
- if (rule) {
- if (rule->super) {
- rule = rule->super;
- }
-
- ctx->recurse++;
- do_xlate_actions(rule->actions, rule->n_actions, ctx);
- ctx->recurse--;
- }
- }
-}
-
-static void
-xlate_output_action(struct action_xlate_ctx *ctx,
- const struct ofp_action_output *oao)
-{
- uint16_t odp_port;
- uint16_t prev_nf_output_iface = ctx->nf_output_iface;
-
- ctx->nf_output_iface = NF_OUT_DROP;
-
- switch (ntohs(oao->port)) {
- case OFPP_IN_PORT:
- add_output_action(ctx, ctx->flow.in_port);
- break;
- case OFPP_TABLE:
- xlate_table_action(ctx, ctx->flow.in_port);
- break;
- case OFPP_NORMAL:
- if (!ctx->ofproto->ofhooks->normal_cb(&ctx->flow, ctx->packet,
- ctx->out, ctx->tags,
- &ctx->nf_output_iface,
- ctx->ofproto->aux)) {
- COVERAGE_INC(ofproto_uninstallable);
- ctx->may_set_up_flow = false;
- }
- break;
- case OFPP_FLOOD:
- add_output_group_action(ctx->out, DP_GROUP_FLOOD,
- &ctx->nf_output_iface);
- break;
- case OFPP_ALL:
- add_output_group_action(ctx->out, DP_GROUP_ALL, &ctx->nf_output_iface);
- break;
- case OFPP_CONTROLLER:
- add_controller_action(ctx->out, oao);
- break;
- case OFPP_LOCAL:
- add_output_action(ctx, ODPP_LOCAL);
- break;
- default:
- odp_port = ofp_port_to_odp_port(ntohs(oao->port));
- if (odp_port != ctx->flow.in_port) {
- add_output_action(ctx, odp_port);
- }
- break;
- }
-
- if (prev_nf_output_iface == NF_OUT_FLOOD) {
- ctx->nf_output_iface = NF_OUT_FLOOD;
- } else if (ctx->nf_output_iface == NF_OUT_DROP) {
- ctx->nf_output_iface = prev_nf_output_iface;
- } else if (prev_nf_output_iface != NF_OUT_DROP &&
- ctx->nf_output_iface != NF_OUT_FLOOD) {
- ctx->nf_output_iface = NF_OUT_MULTI;
- }
-}
-
-static void
-xlate_nicira_action(struct action_xlate_ctx *ctx,
- const struct nx_action_header *nah)
-{
- const struct nx_action_resubmit *nar;
- const struct nx_action_set_tunnel *nast;
- union odp_action *oa;
- int subtype = ntohs(nah->subtype);
-
- assert(nah->vendor == htonl(NX_VENDOR_ID));
- switch (subtype) {
- case NXAST_RESUBMIT:
- nar = (const struct nx_action_resubmit *) nah;
- xlate_table_action(ctx, ofp_port_to_odp_port(ntohs(nar->in_port)));
- break;
-
- case NXAST_SET_TUNNEL:
- nast = (const struct nx_action_set_tunnel *) nah;
- oa = odp_actions_add(ctx->out, ODPAT_SET_TUNNEL);
- ctx->flow.tun_id = oa->tunnel.tun_id = nast->tun_id;
- break;
-
- /* If you add a new action here that modifies flow data, don't forget to
- * update the flow key in ctx->flow in the same key. */
-
- default:
- VLOG_DBG_RL(&rl, "unknown Nicira action type %"PRIu16, subtype);
- break;
- }
+ queue_tx(make_echo_reply(rq), ofconn, ofconn->reply_counter);
+ return 0;
}
-static void
-do_xlate_actions(const union ofp_action *in, size_t n_in,
- struct action_xlate_ctx *ctx)
+static int
+handle_features_request(struct ofproto *p, struct ofconn *ofconn,
+ struct ofp_header *oh)
{
- struct actions_iterator iter;
- const union ofp_action *ia;
- const struct ofport *port;
-
- port = port_array_get(&ctx->ofproto->ports, ctx->flow.in_port);
- if (port && port->opp.config & (OFPPC_NO_RECV | OFPPC_NO_RECV_STP) &&
- port->opp.config & (eth_addr_equals(ctx->flow.dl_dst, stp_eth_addr)
- ? OFPPC_NO_RECV_STP : OFPPC_NO_RECV)) {
- /* Drop this flow. */
- return;
- }
+ struct ofpbuf *features;
+ int error;
- for (ia = actions_first(&iter, in, n_in); ia; ia = actions_next(&iter)) {
- uint16_t type = ntohs(ia->type);
- union odp_action *oa;
+ error = wdp_get_features(p->wdp, &features);
+ if (!error) {
+ struct ofp_switch_features *osf = features->data;
- switch (type) {
- case OFPAT_OUTPUT:
- xlate_output_action(ctx, &ia->output);
- break;
+ update_openflow_length(features);
+ osf->header.version = OFP_VERSION;
+ osf->header.type = OFPT_FEATURES_REPLY;
+ osf->header.xid = oh->xid;
- case OFPAT_SET_VLAN_VID:
- oa = odp_actions_add(ctx->out, ODPAT_SET_VLAN_VID);
- ctx->flow.dl_vlan = oa->vlan_vid.vlan_vid = ia->vlan_vid.vlan_vid;
- break;
+ osf->datapath_id = htonll(p->datapath_id);
+ osf->n_buffers = htonl(pktbuf_capacity());
+ memset(osf->pad, 0, sizeof osf->pad);
- case OFPAT_SET_VLAN_PCP:
- oa = odp_actions_add(ctx->out, ODPAT_SET_VLAN_PCP);
- ctx->flow.dl_vlan_pcp = oa->vlan_pcp.vlan_pcp = ia->vlan_pcp.vlan_pcp;
- break;
+ /* Turn on capabilities implemented by ofproto. */
+ osf->capabilities |= htonl(OFPC_FLOW_STATS | OFPC_TABLE_STATS |
+ OFPC_PORT_STATS);
- case OFPAT_STRIP_VLAN:
- odp_actions_add(ctx->out, ODPAT_STRIP_VLAN);
- ctx->flow.dl_vlan = OFP_VLAN_NONE;
- ctx->flow.dl_vlan_pcp = 0;
- break;
+ queue_tx(features, ofconn, ofconn->reply_counter);
+ }
+ return error;
+}
- case OFPAT_SET_DL_SRC:
- oa = odp_actions_add(ctx->out, ODPAT_SET_DL_SRC);
- memcpy(oa->dl_addr.dl_addr,
- ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN);
- memcpy(ctx->flow.dl_src,
- ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN);
- break;
+static int
+handle_get_config_request(struct ofproto *p, struct ofconn *ofconn,
+ struct ofp_header *oh)
+{
+ struct ofpbuf *buf;
+ struct ofp_switch_config *osc;
+ uint16_t flags;
+ bool drop_frags;
- case OFPAT_SET_DL_DST:
- oa = odp_actions_add(ctx->out, ODPAT_SET_DL_DST);
- memcpy(oa->dl_addr.dl_addr,
- ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN);
- memcpy(ctx->flow.dl_dst,
- ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN);
- break;
+ /* Figure out flags. */
+ wdp_get_drop_frags(p->wdp, &drop_frags);
+ flags = drop_frags ? OFPC_FRAG_DROP : OFPC_FRAG_NORMAL;
- case OFPAT_SET_NW_SRC:
- oa = odp_actions_add(ctx->out, ODPAT_SET_NW_SRC);
- ctx->flow.nw_src = oa->nw_addr.nw_addr = ia->nw_addr.nw_addr;
- break;
+ /* Send reply. */
+ osc = make_openflow_xid(sizeof *osc, OFPT_GET_CONFIG_REPLY, oh->xid, &buf);
+ osc->flags = htons(flags);
+ osc->miss_send_len = htons(ofconn->miss_send_len);
+ queue_tx(buf, ofconn, ofconn->reply_counter);
- case OFPAT_SET_NW_DST:
- oa = odp_actions_add(ctx->out, ODPAT_SET_NW_DST);
- ctx->flow.nw_dst = oa->nw_addr.nw_addr = ia->nw_addr.nw_addr;
- break;
+ return 0;
+}
- case OFPAT_SET_NW_TOS:
- oa = odp_actions_add(ctx->out, ODPAT_SET_NW_TOS);
- ctx->flow.nw_tos = oa->nw_tos.nw_tos = ia->nw_tos.nw_tos;
- break;
+static int
+handle_set_config(struct ofproto *p, struct ofconn *ofconn,
+ struct ofp_switch_config *osc)
+{
+ uint16_t flags;
+ int error;
- case OFPAT_SET_TP_SRC:
- oa = odp_actions_add(ctx->out, ODPAT_SET_TP_SRC);
- ctx->flow.tp_src = oa->tp_port.tp_port = ia->tp_port.tp_port;
- break;
+ error = check_ofp_message(&osc->header, OFPT_SET_CONFIG, sizeof *osc);
+ if (error) {
+ return error;
+ }
+ flags = ntohs(osc->flags);
- if (ofconn == p->controller) {
- case OFPAT_SET_TP_DST:
- oa = odp_actions_add(ctx->out, ODPAT_SET_TP_DST);
- ctx->flow.tp_dst = oa->tp_port.tp_port = ia->tp_port.tp_port;
++ if (ofconn->type == OFCONN_CONTROLLER && ofconn->role != NX_ROLE_SLAVE) {
+ switch (flags & OFPC_FRAG_MASK) {
+ case OFPC_FRAG_NORMAL:
+ wdp_set_drop_frags(p->wdp, false);
break;
-
- case OFPAT_VENDOR:
- xlate_nicira_action(ctx, (const struct nx_action_header *) ia);
+ case OFPC_FRAG_DROP:
+ wdp_set_drop_frags(p->wdp, true);
break;
-
default:
- VLOG_DBG_RL(&rl, "unknown action type %"PRIu16, type);
+ VLOG_WARN_RL(&rl, "requested bad fragment mode (flags=%"PRIx16")",
+ osc->flags);
break;
}
}
-}
- if ((ntohs(osc->miss_send_len) != 0) != (ofconn->miss_send_len != 0)) {
- if (ntohs(osc->miss_send_len) != 0) {
- ofconn->pktbuf = pktbuf_create();
- } else {
- pktbuf_destroy(ofconn->pktbuf);
- }
-static int
-xlate_actions(const union ofp_action *in, size_t n_in,
- const flow_t *flow, struct ofproto *ofproto,
- const struct ofpbuf *packet,
- struct odp_actions *out, tag_type *tags, bool *may_set_up_flow,
- uint16_t *nf_output_iface)
-{
- tag_type no_tags = 0;
- struct action_xlate_ctx ctx;
- COVERAGE_INC(ofproto_ofp2odp);
- odp_actions_init(out);
- ctx.flow = *flow;
- ctx.recurse = 0;
- ctx.ofproto = ofproto;
- ctx.packet = packet;
- ctx.out = out;
- ctx.tags = tags ? tags : &no_tags;
- ctx.may_set_up_flow = true;
- ctx.nf_output_iface = NF_OUT_DROP;
- do_xlate_actions(in, n_in, &ctx);
-
- /* Check with in-band control to see if we're allowed to set up this
- * flow. */
- if (!in_band_rule_check(ofproto->in_band, flow, out)) {
- ctx.may_set_up_flow = false;
-- }
--
- if (may_set_up_flow) {
- *may_set_up_flow = ctx.may_set_up_flow;
- }
- if (nf_output_iface) {
- *nf_output_iface = ctx.nf_output_iface;
- }
- if (odp_actions_overflow(out)) {
- odp_actions_init(out);
- return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_TOO_MANY);
- }
+ ofconn->miss_send_len = ntohs(osc->miss_send_len);
+
return 0;
}
buffer = NULL;
}
- flow_extract(&payload, ntohs(opo->in_port), &flow);
- flow_extract(&payload, 0, ofp_port_to_odp_port(ntohs(opo->in_port)), &flow);
- error = xlate_actions((const union ofp_action *) opo->actions, n_actions,
- &flow, p, &payload, &actions, NULL, NULL, NULL);
- if (error) {
- return error;
- }
-
- dpif_execute(p->dpif, flow.in_port, actions.actions, actions.n_actions,
- &payload);
++ flow_extract(&payload, 0, ntohs(opo->in_port), &flow);
+ wdp_execute(p->wdp, flow.in_port, (const union ofp_action *) actions,
+ n_actions, &payload);
ofpbuf_delete(buffer);
return 0;
}
-static void
-update_port_config(struct ofproto *p, struct ofport *port,
- uint32_t config, uint32_t mask)
-{
- mask &= config ^ port->opp.config;
- if (mask & OFPPC_PORT_DOWN) {
- if (config & OFPPC_PORT_DOWN) {
- netdev_turn_flags_off(port->netdev, NETDEV_UP, true);
- } else {
- netdev_turn_flags_on(port->netdev, NETDEV_UP, true);
- }
- }
-#define REVALIDATE_BITS (OFPPC_NO_RECV | OFPPC_NO_RECV_STP | OFPPC_NO_FWD)
- if (mask & REVALIDATE_BITS) {
- COVERAGE_INC(ofproto_costly_flags);
- port->opp.config ^= mask & REVALIDATE_BITS;
- p->need_revalidate = true;
- }
-#undef REVALIDATE_BITS
- if (mask & OFPPC_NO_FLOOD) {
- port->opp.config ^= OFPPC_NO_FLOOD;
- refresh_port_groups(p);
- }
- if (mask & OFPPC_NO_PACKET_IN) {
- port->opp.config ^= OFPPC_NO_PACKET_IN;
- }
-}
-
static int
- handle_port_mod(struct ofproto *p, struct ofp_header *oh)
+ handle_port_mod(struct ofproto *p, struct ofconn *ofconn,
+ struct ofp_header *oh)
{
const struct ofp_port_mod *opm;
- struct ofport *port;
+ struct wdp_port port;
int error;
+ error = reject_slave_controller(ofconn, oh);
+ if (error) {
+ return error;
+ }
error = check_ofp_message(oh, OFPT_PORT_MOD, sizeof *opm);
if (error) {
return error;
memset(ots, 0, sizeof *ots);
ots->table_id = TABLEID_CLASSIFIER;
strcpy(ots->name, "classifier");
- ots->wildcards = htonl(OFPFW_ALL);
+ ots->wildcards = p->tun_id_from_cookie ? htonl(OVSFW_ALL)
+ : htonl(OFPFW_ALL);
- ots->max_entries = htonl(65536);
- ots->active_count = htonl(n_wild);
- ots->lookup_count = htonll(0); /* XXX */
- ots->matched_count = htonll(0); /* XXX */
+ ots->max_entries = htonl(dpstats.wild.max_capacity);
+ ots->active_count = htonl(dpstats.wild.n_flows);
+ ots->lookup_count = htonll(dpstats.wild.n_hit + dpstats.wild.n_missed);
+ ots->matched_count = htonll(dpstats.wild.n_hit);
queue_tx(msg, ofconn, ofconn->reply_counter);
return 0;
}
static void
-append_port_stat(struct ofport *port, uint16_t port_no, struct ofconn *ofconn,
+append_port_stat(struct wdp_port *port, struct ofconn *ofconn,
- struct ofpbuf *msg)
+ struct ofpbuf **msgp)
{
struct netdev_stats stats;
struct ofp_port_stats *ops;
* netdev_get_stats() will log errors. */
netdev_get_stats(port->netdev, &stats);
- ops = append_stats_reply(sizeof *ops, ofconn, &msg);
+ ops = append_stats_reply(sizeof *ops, ofconn, msgp);
- ops->port_no = htons(odp_port_to_ofp_port(port_no));
+ ops->port_no = htons(port->opp.port_no);
memset(ops->pad, 0, sizeof ops->pad);
ops->rx_packets = htonll(stats.rx_packets);
ops->tx_packets = htonll(stats.tx_packets);
msg = start_stats_reply(osr, sizeof *ops * 16);
if (psr->port_no != htons(OFPP_NONE)) {
- port = port_array_get(&p->ports,
- ofp_port_to_odp_port(ntohs(psr->port_no)));
- if (port) {
- append_port_stat(port, ntohs(psr->port_no), ofconn, &msg);
+ struct wdp_port port;
+
+ if (!wdp_port_query_by_number(p->wdp, ntohs(psr->port_no), &port)) {
- append_port_stat(&port, ofconn, msg);
++ append_port_stat(&port, ofconn, &msg);
+ wdp_port_free(&port);
}
} else {
- PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) {
- append_port_stat(port, port_no, ofconn, &msg);
+ struct wdp_port *ports;
+ size_t n_ports;
+ size_t i;
+
+ wdp_port_list(p->wdp, &ports, &n_ports);
+ for (i = 0; i < n_ports; i++) {
- append_port_stat(&ports[i], ofconn, msg);
++ append_port_stat(&ports[i], ofconn, &msg);
}
+ wdp_port_array_free(ports, n_ports);
}
queue_tx(msg, ofconn, ofconn->reply_counter);
ofs = append_stats_reply(len, cbdata->ofconn, &cbdata->msg);
ofs->length = htons(len);
- ofs->table_id = rule->cr.wc.wildcards ? TABLEID_CLASSIFIER : TABLEID_HASH;
+ ofs->table_id = rule->cr.flow.wildcards ? TABLEID_CLASSIFIER : TABLEID_HASH;
ofs->pad = 0;
- flow_to_match(&rule->cr.flow, &ofs->match);
- flow_to_match(&rule->cr.flow, rule->cr.wc.wildcards,
- cbdata->ofproto->tun_id_from_cookie, &ofs->match);
++ flow_to_match(&rule->cr.flow, cbdata->ofproto->tun_id_from_cookie,
++ &ofs->match);
ofs->duration_sec = htonl(sec);
ofs->duration_nsec = htonl(msec * 1000000);
- ofs->cookie = rule->flow_cookie;
- ofs->priority = htons(rule->cr.priority);
+ ofs->cookie = ofproto_rule_cast(rule)->flow_cookie;
+ ofs->priority = htons(rule->cr.flow.priority);
ofs->idle_timeout = htons(rule->idle_timeout);
ofs->hard_timeout = htons(rule->hard_timeout);
memset(ofs->pad2, 0, sizeof ofs->pad2);
cbdata.ofconn = ofconn;
cbdata.out_port = fsr->out_port;
cbdata.msg = start_stats_reply(osr, 1024);
- flow_from_match(&target, 0, &fsr->match);
- cls_rule_from_match(&fsr->match, 0, false, 0, &target);
- classifier_for_each_match(&p->cls, &target,
- table_id_to_include(fsr->table_id),
- flow_stats_cb, &cbdata);
++ flow_from_match(&fsr->match, 0, false, 0, &target);
+ wdp_flow_for_each_match(p->wdp, &target,
+ table_id_to_include(fsr->table_id),
+ flow_stats_cb, &cbdata);
queue_tx(cbdata.msg, ofconn, ofconn->reply_counter);
return 0;
}
uint64_t packet_count, byte_count;
size_t act_len = sizeof *rule->actions * rule->n_actions;
- /* Don't report on subrules. */
- if (rule->super != NULL) {
- return;
- }
-
query_stats(cbdata->ofproto, rule, &packet_count, &byte_count);
- flow_to_match(&rule->cr.flow, &match);
- flow_to_match(&rule->cr.flow, rule->cr.wc.wildcards,
- cbdata->ofproto->tun_id_from_cookie, &match);
++ flow_to_match(&rule->cr.flow, cbdata->ofproto->tun_id_from_cookie,
++ &match);
ds_put_format(results, "duration=%llds, ",
(time_msec() - rule->created) / 1000);
void
ofproto_get_all_flows(struct ofproto *p, struct ds *results)
{
- struct ofp_match match;
- struct cls_rule target;
struct flow_stats_ds_cbdata cbdata;
+ struct ofp_match match;
+ flow_t target;
memset(&match, 0, sizeof match);
- match.wildcards = htonl(OFPFW_ALL);
+ match.wildcards = htonl(OVSFW_ALL);
cbdata.ofproto = p;
cbdata.results = results;
- flow_from_match(&target, 0, &match);
- cls_rule_from_match(&match, 0, false, 0, &target);
- classifier_for_each_match(&p->cls, &target, CLS_INC_ALL,
- flow_stats_ds_cb, &cbdata);
++ flow_from_match(&match, 0, false, 0, &target);
+ wdp_flow_for_each_match(p->wdp, &target, CLS_INC_ALL,
+ flow_stats_ds_cb, &cbdata);
}
struct aggregate_stats_cbdata {
cbdata.packet_count = 0;
cbdata.byte_count = 0;
cbdata.n_flows = 0;
- flow_from_match(&target, 0, &asr->match);
- cls_rule_from_match(&asr->match, 0, false, 0, &target);
- classifier_for_each_match(&p->cls, &target,
- table_id_to_include(asr->table_id),
- aggregate_stats_cb, &cbdata);
++ flow_from_match(&asr->match, 0, false, 0, &target);
+ wdp_flow_for_each_match(p->wdp, &target,
+ table_id_to_include(asr->table_id),
+ aggregate_stats_cb, &cbdata);
msg = start_stats_reply(osr, sizeof *reply);
reply = append_stats_reply(sizeof *reply, ofconn, &msg);
}
}
-static long long int
-msec_from_nsec(uint64_t sec, uint32_t nsec)
-{
- return !sec ? 0 : sec * 1000 + nsec / 1000000;
-}
-
-static void
-update_time(struct ofproto *ofproto, struct rule *rule,
- const struct odp_flow_stats *stats)
-{
- long long int used = msec_from_nsec(stats->used_sec, stats->used_nsec);
- if (used > rule->used) {
- rule->used = used;
- if (rule->super && used > rule->super->used) {
- rule->super->used = used;
- }
- netflow_flow_update_time(ofproto->netflow, &rule->nf_flow, used);
- }
-}
-
-static void
-update_stats(struct ofproto *ofproto, struct rule *rule,
- const struct odp_flow_stats *stats)
-{
- if (stats->n_packets) {
- update_time(ofproto, rule, stats);
- rule->packet_count += stats->n_packets;
- rule->byte_count += stats->n_bytes;
- netflow_flow_update_flags(&rule->nf_flow, stats->ip_tos,
- stats->tcp_flags);
- }
-}
-
+ /* Implements OFPFC_ADD and the cases for OFPFC_MODIFY and OFPFC_MODIFY_STRICT
+ * in which no matching flow already exists in the flow table.
+ *
+ * Adds the flow specified by 'ofm', which is followed by 'n_actions'
+ * ofp_actions, to 'p''s flow table. Returns 0 on success or an OpenFlow error
+ * code as encoded by ofp_mkerr() on failure.
+ *
+ * 'ofconn' is used to retrieve the packet buffer specified in ofm->buffer_id,
+ * if any. */
static int
add_flow(struct ofproto *p, struct ofconn *ofconn,
- struct ofp_flow_mod *ofm, size_t n_actions)
+ const struct ofp_flow_mod *ofm, size_t n_actions)
{
+ struct wdp_rule *rule;
+ struct wdp_flow_put put;
struct ofpbuf *packet;
- struct rule *rule;
uint16_t in_port;
+ flow_t flow;
int error;
- flow_from_match(&flow, ntohs(ofm->priority), &ofm->match);
- if (ofm->flags & htons(OFPFF_CHECK_OVERLAP)) {
- flow_t flow;
- uint32_t wildcards;
-
- flow_from_match(&ofm->match, p->tun_id_from_cookie, ofm->cookie,
- &flow, &wildcards);
- if (classifier_rule_overlaps(&p->cls, &flow, wildcards,
- ntohs(ofm->priority))) {
- return ofp_mkerr(OFPET_FLOW_MOD_FAILED, OFPFMFC_OVERLAP);
- }
++ flow_from_match(&ofm->match, ntohs(ofm->priority), p->tun_id_from_cookie,
++ ofm->cookie, &flow);
+ if (ofm->flags & htons(OFPFF_CHECK_OVERLAP)
+ && wdp_flow_overlaps(p->wdp, &flow)) {
+ return ofp_mkerr(OFPET_FLOW_MOD_FAILED, OFPFMFC_OVERLAP);
}
- rule = rule_create(p, NULL, (const union ofp_action *) ofm->actions,
- n_actions, ntohs(ofm->idle_timeout),
- ntohs(ofm->hard_timeout), ofm->cookie,
- ofm->flags & htons(OFPFF_SEND_FLOW_REM));
- cls_rule_from_match(&ofm->match, ntohs(ofm->priority),
- p->tun_id_from_cookie, ofm->cookie, &rule->cr);
+ put.flags = WDP_PUT_CREATE | WDP_PUT_MODIFY | WDP_PUT_ALL;
+ put.flow = &flow;
+ put.actions = (const union ofp_action *) ofm->actions;
+ put.n_actions = n_actions;
+ put.idle_timeout = ntohs(ofm->idle_timeout);
+ put.hard_timeout = ntohs(ofm->hard_timeout);
+ error = wdp_flow_put(p->wdp, &put, NULL, &rule);
+ if (error) {
+ /* XXX wdp_flow_put should return OpenFlow error code. */
+ return error;
+ }
+ ofproto_rule_init(rule);
- error = 0;
if (ofm->buffer_id != htonl(UINT32_MAX)) {
error = pktbuf_retrieve(ofconn->pktbuf, ntohl(ofm->buffer_id),
&packet, &in_port);
- } else {
- packet = NULL;
- in_port = UINT16_MAX;
+ if (!error) {
+ wdp_flow_inject(p->wdp, rule, in_port, packet);
+ ofpbuf_delete(packet);
+ }
}
- rule_insert(p, rule, packet, in_port);
- ofpbuf_delete(packet);
- return error;
+ return 0;
}
-static struct rule *
++static struct wdp_rule *
+ find_flow_strict(struct ofproto *p, const struct ofp_flow_mod *ofm)
+ {
- uint32_t wildcards;
+ flow_t flow;
+
- flow_from_match(&ofm->match, p->tun_id_from_cookie, ofm->cookie,
- &flow, &wildcards);
- return rule_from_cls_rule(classifier_find_rule_exactly(
- &p->cls, &flow, wildcards,
- ntohs(ofm->priority)));
++ flow_from_match(&ofm->match, ntohs(ofm->priority),
++ p->tun_id_from_cookie, ofm->cookie, &flow);
++ return wdp_flow_get(p->wdp, &flow);
+ }
+
static int
- modify_flow(struct ofproto *p, const struct ofp_flow_mod *ofm,
- size_t n_actions, uint16_t command, struct wdp_rule *rule)
+ send_buffered_packet(struct ofproto *ofproto, struct ofconn *ofconn,
- struct rule *rule, const struct ofp_flow_mod *ofm)
++ struct wdp_rule *rule, const struct ofp_flow_mod *ofm)
{
- if (rule_is_hidden(rule)) {
+ struct ofpbuf *packet;
+ uint16_t in_port;
- flow_t flow;
+ int error;
+
+ if (ofm->buffer_id == htonl(UINT32_MAX)) {
return 0;
}
- if (command == OFPFC_DELETE) {
- delete_flow(p, rule, OFPPR_DELETE);
- } else {
- const struct ofp_action_header *actions = ofm->actions;
- struct wdp_flow_put put;
-
- ofproto_rule_cast(rule)->flow_cookie = ofm->cookie;
-
- put.flags = WDP_PUT_MODIFY | WDP_PUT_ACTIONS;
- put.flow = &rule->cr.flow;
- put.actions = (const union ofp_action *) actions;
- put.n_actions = n_actions;
- put.idle_timeout = put.hard_timeout = 0;
- wdp_flow_put(p->wdp, &put, NULL, NULL);
+ error = pktbuf_retrieve(ofconn->pktbuf, ntohl(ofm->buffer_id),
+ &packet, &in_port);
+ if (error) {
+ return error;
}
- flow_extract(packet, 0, in_port, &flow);
- rule_execute(ofproto, rule, packet, &flow);
++ wdp_flow_inject(ofproto->wdp, rule, in_port, packet);
+ ofpbuf_delete(packet);
+
return 0;
}
- struct rule *match;
+ \f
+ /* OFPFC_MODIFY and OFPFC_MODIFY_STRICT. */
+
+ struct modify_flows_cbdata {
+ struct ofproto *ofproto;
+ const struct ofp_flow_mod *ofm;
+ size_t n_actions;
- size_t n_actions, struct rule *);
-static void modify_flows_cb(struct cls_rule *, void *cbdata_);
++ struct wdp_rule *match;
+ };
+
+ static int modify_flow(struct ofproto *, const struct ofp_flow_mod *,
++ size_t n_actions, struct wdp_rule *);
++static void modify_flows_cb(struct wdp_rule *, void *cbdata_);
+ /* Implements OFPFC_MODIFY. Returns 0 on success or an OpenFlow error code as
+ * encoded by ofp_mkerr() on failure.
+ *
+ * 'ofconn' is used to retrieve the packet buffer specified in ofm->buffer_id,
+ * if any. */
static int
- modify_flows_strict(struct ofproto *p, const struct ofp_flow_mod *ofm,
- size_t n_actions, uint16_t command)
+ modify_flows_loose(struct ofproto *p, struct ofconn *ofconn,
+ const struct ofp_flow_mod *ofm, size_t n_actions)
{
- struct wdp_rule *rule;
- flow_t flow;
+ struct modify_flows_cbdata cbdata;
- struct cls_rule target;
++ flow_t target;
- flow_from_match(&flow, ntohs(ofm->priority), &ofm->match);
- rule = wdp_flow_get(p->wdp, &flow);
+ cbdata.ofproto = p;
+ cbdata.ofm = ofm;
+ cbdata.n_actions = n_actions;
+ cbdata.match = NULL;
- if (rule) {
- if (command == OFPFC_DELETE
- && ofm->out_port != htons(OFPP_NONE)
- && !rule_has_out_port(rule, ofm->out_port)) {
- return 0;
- }
- cls_rule_from_match(&ofm->match, 0, p->tun_id_from_cookie, ofm->cookie,
- &target);
++ flow_from_match(&ofm->match, 0, p->tun_id_from_cookie, ofm->cookie,
++ &target);
- modify_flow(p, ofm, n_actions, command, rule);
- classifier_for_each_match(&p->cls, &target, CLS_INC_ALL,
- modify_flows_cb, &cbdata);
++ wdp_flow_for_each_match(p->wdp, &target, CLS_INC_ALL,
++ modify_flows_cb, &cbdata);
+ if (cbdata.match) {
+ /* This credits the packet to whichever flow happened to happened to
+ * match last. That's weird. Maybe we should do a lookup for the
+ * flow that actually matches the packet? Who knows. */
+ send_buffered_packet(p, ofconn, cbdata.match, ofm);
+ return 0;
+ } else {
+ return add_flow(p, ofconn, ofm, n_actions);
}
- return 0;
}
- struct modify_flows_cbdata {
- struct ofproto *ofproto;
- const struct ofp_flow_mod *ofm;
- uint16_t out_port;
- size_t n_actions;
- uint16_t command;
- };
+ /* Implements OFPFC_MODIFY_STRICT. Returns 0 on success or an OpenFlow error
+ * code as encoded by ofp_mkerr() on failure.
+ *
+ * 'ofconn' is used to retrieve the packet buffer specified in ofm->buffer_id,
+ * if any. */
+ static int
+ modify_flow_strict(struct ofproto *p, struct ofconn *ofconn,
+ struct ofp_flow_mod *ofm, size_t n_actions)
+ {
- struct rule *rule = find_flow_strict(p, ofm);
++ struct wdp_rule *rule = find_flow_strict(p, ofm);
+ if (rule && !rule_is_hidden(rule)) {
+ modify_flow(p, ofm, n_actions, rule);
+ return send_buffered_packet(p, ofconn, rule, ofm);
+ } else {
+ return add_flow(p, ofconn, ofm, n_actions);
+ }
+ }
+ /* Callback for modify_flows_loose(). */
static void
-modify_flows_cb(struct cls_rule *rule_, void *cbdata_)
+modify_flows_cb(struct wdp_rule *rule, void *cbdata_)
{
- struct rule *rule = rule_from_cls_rule(rule_);
struct modify_flows_cbdata *cbdata = cbdata_;
- if (cbdata->out_port != htons(OFPP_NONE)
- && !rule_has_out_port(rule, cbdata->out_port)) {
- return;
+ if (!rule_is_hidden(rule)) {
+ cbdata->match = rule;
+ modify_flow(cbdata->ofproto, cbdata->ofm, cbdata->n_actions, rule);
}
-
- modify_flow(cbdata->ofproto, cbdata->ofm, cbdata->n_actions,
- cbdata->command, rule);
}
+ /* Implements core of OFPFC_MODIFY and OFPFC_MODIFY_STRICT where 'rule' has
+ * been identified as a flow in 'p''s flow table to be modified, by changing
+ * the rule's actions to match those in 'ofm' (which is followed by 'n_actions'
+ * ofp_action[] structures). */
static int
- modify_flows_loose(struct ofproto *p, const struct ofp_flow_mod *ofm,
- size_t n_actions, uint16_t command)
+ modify_flow(struct ofproto *p, const struct ofp_flow_mod *ofm,
- size_t n_actions, struct rule *rule)
++ size_t n_actions, struct wdp_rule *rule)
{
- struct modify_flows_cbdata cbdata;
- size_t actions_len = n_actions * sizeof *rule->actions;
++ const struct ofp_action_header *actions = ofm->actions;
++ struct ofproto_rule *ofproto_rule = ofproto_rule_cast(rule);
++ struct wdp_flow_put put;
+
- rule->flow_cookie = ofm->cookie;
++ ofproto_rule->flow_cookie = ofm->cookie;
+
+ /* If the actions are the same, do nothing. */
+ if (n_actions == rule->n_actions
- && !memcmp(ofm->actions, rule->actions, actions_len))
++ && !memcmp(ofm->actions, rule->actions, sizeof *actions * n_actions))
+ {
+ return 0;
+ }
+
- /* Replace actions. */
- free(rule->actions);
- rule->actions = xmemdup(ofm->actions, actions_len);
- rule->n_actions = n_actions;
-
- /* Make sure that the datapath gets updated properly. */
- if (rule->cr.wc.wildcards) {
- COVERAGE_INC(ofproto_mod_wc_flow);
- p->need_revalidate = true;
- } else {
- rule_update_actions(p, rule);
- }
-
- return 0;
++ put.flags = WDP_PUT_MODIFY | WDP_PUT_ACTIONS;
++ put.flow = &rule->cr.flow;
++ put.actions = (const union ofp_action *) actions;
++ put.n_actions = n_actions;
++ put.idle_timeout = put.hard_timeout = 0;
++ return wdp_flow_put(p->wdp, &put, NULL, NULL);
+ }
+ \f
+ /* OFPFC_DELETE implementation. */
+
+ struct delete_flows_cbdata {
+ struct ofproto *ofproto;
+ uint16_t out_port;
+ };
+
-static void delete_flows_cb(struct cls_rule *, void *cbdata_);
-static void delete_flow(struct ofproto *, struct rule *, uint16_t out_port);
++static void delete_flows_cb(struct wdp_rule *, void *cbdata_);
++static void delete_flow_core(struct ofproto *, struct wdp_rule *,
++ uint16_t out_port);
+
+ /* Implements OFPFC_DELETE. */
+ static void
+ delete_flows_loose(struct ofproto *p, const struct ofp_flow_mod *ofm)
+ {
+ struct delete_flows_cbdata cbdata;
- struct cls_rule target;
+ flow_t target;
cbdata.ofproto = p;
- cbdata.ofm = ofm;
- cbdata.out_port = (command == OFPFC_DELETE ? ofm->out_port
- : htons(OFPP_NONE));
- cbdata.n_actions = n_actions;
- cbdata.command = command;
+ cbdata.out_port = ofm->out_port;
+
- cls_rule_from_match(&ofm->match, 0, p->tun_id_from_cookie, ofm->cookie,
- &target);
++ flow_from_match(&ofm->match, 0, p->tun_id_from_cookie, ofm->cookie,
++ &target);
- flow_from_match(&target, 0, &ofm->match);
- classifier_for_each_match(&p->cls, &target, CLS_INC_ALL,
- delete_flows_cb, &cbdata);
+ wdp_flow_for_each_match(p->wdp, &target, CLS_INC_ALL,
- modify_flows_cb, &cbdata);
- return 0;
++ delete_flows_cb, &cbdata);
}
- struct rule *rule = find_flow_strict(p, ofm);
+ /* Implements OFPFC_DELETE_STRICT. */
+ static void
+ delete_flow_strict(struct ofproto *p, struct ofp_flow_mod *ofm)
+ {
- delete_flow(p, rule, ofm->out_port);
++ struct wdp_rule *rule = find_flow_strict(p, ofm);
+ if (rule) {
-delete_flows_cb(struct cls_rule *rule_, void *cbdata_)
++ delete_flow_core(p, rule, ofm->out_port);
+ }
+ }
+
+ /* Callback for delete_flows_loose(). */
+ static void
- struct rule *rule = rule_from_cls_rule(rule_);
++delete_flows_cb(struct wdp_rule *rule, void *cbdata_)
+ {
- delete_flow(cbdata->ofproto, rule, cbdata->out_port);
+ struct delete_flows_cbdata *cbdata = cbdata_;
+
-delete_flow(struct ofproto *p, struct rule *rule, uint16_t out_port)
++ delete_flow_core(cbdata->ofproto, rule, cbdata->out_port);
+ }
+
+ /* Implements core of OFPFC_DELETE and OFPFC_DELETE_STRICT where 'rule' has
+ * been identified as a flow to delete from 'p''s flow table, by deleting the
+ * flow and sending out a OFPT_FLOW_REMOVED message to any interested
+ * controller.
+ *
+ * Will not delete 'rule' if it is hidden. Will delete 'rule' only if
+ * 'out_port' is htons(OFPP_NONE) or if 'rule' actually outputs to the
+ * specified 'out_port'. */
+ static void
- send_flow_removed(p, rule, time_msec(), OFPRR_DELETE);
- rule_remove(p, rule);
++delete_flow_core(struct ofproto *p, struct wdp_rule *rule, uint16_t out_port)
+ {
+ if (rule_is_hidden(rule)) {
+ return;
+ }
+
+ if (out_port != htons(OFPP_NONE) && !rule_has_out_port(rule, out_port)) {
+ return;
+ }
+
++ delete_flow(p, rule, OFPRR_DELETE);
+ }
+ \f
static int
handle_flow_mod(struct ofproto *p, struct ofconn *ofconn,
struct ofp_flow_mod *ofm)
switch (ntohs(ofm->command)) {
case OFPFC_ADD:
-- return add_flow(p, ofconn, ofm, n_actions);
++ return modify_flows_loose(p, ofconn, ofm, n_actions);
case OFPFC_MODIFY:
- return modify_flows_loose(p, ofm, n_actions, OFPFC_MODIFY);
- return modify_flows_loose(p, ofconn, ofm, n_actions);
++ return modify_flow_strict(p, ofconn, ofm, n_actions);
case OFPFC_MODIFY_STRICT:
- return modify_flows_strict(p, ofm, n_actions, OFPFC_MODIFY);
+ return modify_flow_strict(p, ofconn, ofm, n_actions);
case OFPFC_DELETE:
- return modify_flows_loose(p, ofm, n_actions, OFPFC_DELETE);
+ delete_flows_loose(p, ofm);
+ return 0;
case OFPFC_DELETE_STRICT:
- return modify_flows_strict(p, ofm, n_actions, OFPFC_DELETE);
+ delete_flow_strict(p, ofm);
+ return 0;
default:
return ofp_mkerr(OFPET_FLOW_MOD_FAILED, OFPFMFC_BAD_COMMAND);
}
\f
static void
-handle_odp_miss_msg(struct ofproto *p, struct ofpbuf *packet)
+handle_flow_miss(struct ofproto *p, struct wdp_packet *packet)
{
- struct odp_msg *msg = packet->data;
- struct rule *rule;
- struct ofpbuf payload;
+ struct wdp_rule *rule;
flow_t flow;
- flow_extract(packet->payload, packet->in_port, &flow);
- payload.data = msg + 1;
- payload.size = msg->length - sizeof *msg;
- flow_extract(&payload, msg->arg, msg->port, &flow);
-
- /* Check with in-band control to see if this packet should be sent
- * to the local port regardless of the flow table. */
- if (in_band_msg_in_hook(p->in_band, &flow, &payload)) {
- union odp_action action;
-
- memset(&action, 0, sizeof(action));
- action.output.type = ODPAT_OUTPUT;
- action.output.port = ODPP_LOCAL;
- dpif_execute(p->dpif, flow.in_port, &action, 1, &payload);
- }
-
- rule = lookup_valid_rule(p, &flow);
++ flow_extract(packet->payload, packet->tun_id, packet->in_port, &flow);
+ rule = wdp_flow_match(p->wdp, &flow);
if (!rule) {
/* Don't send a packet-in if OFPPC_NO_PACKET_IN asserted. */
- struct ofport *port = port_array_get(&p->ports, msg->port);
- if (port) {
- if (port->opp.config & OFPPC_NO_PACKET_IN) {
+ struct wdp_port port;
+
+ if (!wdp_port_query_by_number(p->wdp, packet->in_port, &port)) {
+ bool no_packet_in = (port.opp.config & OFPPC_NO_PACKET_IN) != 0;
+ wdp_port_free(&port);
+ if (no_packet_in) {
COVERAGE_INC(ofproto_no_packet_in);
- /* XXX install 'drop' flow entry */
- ofpbuf_delete(packet);
+ wdp_packet_destroy(packet);
return;
}
} else {
return;
}
- if (rule->cr.wc.wildcards) {
- rule = rule_create_subrule(p, rule, &flow);
- rule_make_actions(p, rule, packet);
- } else {
- if (!rule->may_install) {
- /* The rule is not installable, that is, we need to process every
- * packet, so process the current packet and set its actions into
- * 'subrule'. */
- rule_make_actions(p, rule, packet);
- } else {
- /* XXX revalidate rule if it needs it */
- }
- }
-
- rule_execute(p, rule, &payload, &flow);
- rule_reinstall(p, rule);
+ wdp_flow_inject(p->wdp, rule, packet->in_port, packet->payload);
- if (rule->cr.flow.priority == FAIL_OPEN_PRIORITY
- && rconn_is_connected(p->controller->rconn)) {
- if (rule->super && rule->super->cr.priority == FAIL_OPEN_PRIORITY) {
++ if (rule->cr.flow.priority == FAIL_OPEN_PRIORITY) {
/*
* Extra-special case for fail-open mode.
*
*
* See the top-level comment in fail-open.c for more information.
*/
- pinsched_send(p->miss_sched, packet->in_port, packet,
- send_packet_in_miss, p);
+ send_packet_in(p, packet);
} else {
- ofpbuf_delete(packet);
+ wdp_packet_destroy(packet);
}
}
static void
-handle_odp_msg(struct ofproto *p, struct ofpbuf *packet)
+handle_wdp_packet(struct ofproto *p, struct wdp_packet *packet)
{
- struct odp_msg *msg = packet->data;
-
- switch (msg->type) {
- case _ODPL_ACTION_NR:
+ switch (packet->channel) {
+ case WDP_CHAN_ACTION:
COVERAGE_INC(ofproto_ctlr_action);
- pinsched_send(p->action_sched, packet->in_port, packet,
- send_packet_in_action, p);
+ send_packet_in(p, packet);
break;
- case _ODPL_SFLOW_NR:
- if (p->sflow) {
- ofproto_sflow_received(p->sflow, msg);
- }
- ofpbuf_delete(packet);
+ case WDP_CHAN_SFLOW:
+ /* XXX */
+ wdp_packet_destroy(packet);
break;
- case _ODPL_MISS_NR:
- handle_odp_miss_msg(p, packet);
+ case WDP_CHAN_MISS:
+ handle_flow_miss(p, packet);
break;
+ case WDP_N_CHANS:
default:
- VLOG_WARN_RL(&rl, "received ODP message of unexpected type %"PRIu32,
- msg->type);
+ wdp_packet_destroy(packet);
+ VLOG_WARN_RL(&rl, "received message on unexpected channel %d",
+ (int) packet->channel);
break;
}
}
\f
-static void
-revalidate_cb(struct cls_rule *sub_, void *cbdata_)
-{
- struct rule *sub = rule_from_cls_rule(sub_);
- struct revalidate_cbdata *cbdata = cbdata_;
-
- if (cbdata->revalidate_all
- || (cbdata->revalidate_subrules && sub->super)
- || (tag_set_intersects(&cbdata->revalidate_set, sub->tags))) {
- revalidate_rule(cbdata->ofproto, sub);
- }
-}
-
-static bool
-revalidate_rule(struct ofproto *p, struct rule *rule)
-{
- const flow_t *flow = &rule->cr.flow;
-
- COVERAGE_INC(ofproto_revalidate_rule);
- if (rule->super) {
- struct rule *super;
- super = rule_from_cls_rule(classifier_lookup_wild(&p->cls, flow));
- if (!super) {
- rule_remove(p, rule);
- return false;
- } else if (super != rule->super) {
- COVERAGE_INC(ofproto_revalidate_moved);
- list_remove(&rule->list);
- list_push_back(&super->list, &rule->list);
- rule->super = super;
- rule->hard_timeout = super->hard_timeout;
- rule->idle_timeout = super->idle_timeout;
- rule->created = super->created;
- rule->used = 0;
- }
- }
-
- rule_update_actions(p, rule);
- return true;
-}
-
static struct ofpbuf *
- compose_flow_removed(const struct wdp_rule *rule, uint8_t reason)
-compose_flow_removed(struct ofproto *p, const struct rule *rule,
- long long int now, uint8_t reason)
++compose_flow_removed(struct ofproto *p, const struct wdp_rule *rule,
++ uint8_t reason)
{
- struct ofp_flow_removed *ofr;
- struct ofpbuf *buf;
- long long int tdiff = now - rule->created;
+ long long int tdiff = time_msec() - rule->created;
uint32_t sec = tdiff / 1000;
uint32_t msec = tdiff - (sec * 1000);
+ struct ofp_flow_removed *ofr;
+ struct ofpbuf *buf;
ofr = make_openflow(sizeof *ofr, OFPT_FLOW_REMOVED, &buf);
- flow_to_match(&rule->cr.flow, &ofr->match);
- flow_to_match(&rule->cr.flow, rule->cr.wc.wildcards, p->tun_id_from_cookie,
- &ofr->match);
- ofr->cookie = rule->flow_cookie;
- ofr->priority = htons(rule->cr.priority);
++ flow_to_match(&rule->cr.flow, p->tun_id_from_cookie, &ofr->match);
+ ofr->cookie = ofproto_rule_cast(rule)->flow_cookie;
+ ofr->priority = htons(rule->cr.flow.priority);
ofr->reason = reason;
ofr->duration_sec = htonl(sec);
ofr->duration_nsec = htonl(msec * 1000000);
* being added (and expiring). (It also prevents processing OpenFlow
* requests that would not add new flows, so it is imperfect.) */
- prev = NULL;
- LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
- if (rule->send_flow_removed && rconn_is_connected(ofconn->rconn)
- && ofconn_receives_async_msgs(ofconn)) {
- if (prev) {
- queue_tx(ofpbuf_clone(buf), prev, prev->reply_counter);
- } else {
- buf = compose_flow_removed(p, rule, now, reason);
- }
- prev = ofconn;
- }
- }
- if (prev) {
- queue_tx(buf, prev, prev->reply_counter);
- }
-}
-
+ struct ofproto_rule *ofproto_rule = ofproto_rule_cast(rule);
+ struct wdp_flow_stats stats;
+ struct ofpbuf *buf;
-static void
-expire_rule(struct cls_rule *cls_rule, void *p_)
-{
- struct ofproto *p = p_;
- struct rule *rule = rule_from_cls_rule(cls_rule);
- long long int hard_expire, idle_expire, expire, now;
-
- hard_expire = (rule->hard_timeout
- ? rule->created + rule->hard_timeout * 1000
- : LLONG_MAX);
- idle_expire = (rule->idle_timeout
- && (rule->super || list_is_empty(&rule->list))
- ? rule->used + rule->idle_timeout * 1000
- : LLONG_MAX);
- expire = MIN(hard_expire, idle_expire);
-
- now = time_msec();
- if (now < expire) {
- if (rule->installed && now >= rule->used + 5000) {
- uninstall_idle_flow(p, rule);
- } else if (!rule->cr.wc.wildcards) {
- active_timeout(p, rule);
- }
+ if (ofproto_rule->send_flow_removed) {
+ /* Compose most of the ofp_flow_removed before 'rule' is destroyed. */
- buf = compose_flow_removed(rule, reason);
++ buf = compose_flow_removed(p, rule, reason);
+ } else {
+ buf = NULL;
+ }
+ if (wdp_flow_delete(p->wdp, rule, &stats)) {
return;
}
- COVERAGE_INC(ofproto_expired);
-
- /* Update stats. This code will be a no-op if the rule expired
- * due to an idle timeout. */
- if (rule->cr.wc.wildcards) {
- struct rule *subrule, *next;
- LIST_FOR_EACH_SAFE (subrule, next, struct rule, list, &rule->list) {
- rule_remove(p, subrule);
- }
- } else {
- rule_uninstall(p, rule);
- }
+ if (buf) {
+ struct ofp_flow_removed *ofr;
+ struct ofconn *prev = NULL;
+ struct ofconn *ofconn;
- if (!rule_is_hidden(rule)) {
- send_flow_removed(p, rule, now,
- (now >= hard_expire
- ? OFPRR_HARD_TIMEOUT : OFPRR_IDLE_TIMEOUT));
- }
- rule_remove(p, rule);
-}
+ /* Compose the parts of the ofp_flow_removed that require stats. */
+ ofr = buf->data;
+ ofr->packet_count = htonll(stats.n_packets);
+ ofr->byte_count = htonll(stats.n_bytes);
-static void
-active_timeout(struct ofproto *ofproto, struct rule *rule)
-{
- if (ofproto->netflow && !is_controller_rule(rule) &&
- netflow_active_timeout_expired(ofproto->netflow, &rule->nf_flow)) {
- struct ofexpired expired;
- struct odp_flow odp_flow;
-
- /* Get updated flow stats. */
- memset(&odp_flow, 0, sizeof odp_flow);
- if (rule->installed) {
- odp_flow.key = rule->cr.flow;
- odp_flow.flags = ODPFF_ZERO_TCP_FLAGS;
- dpif_flow_get(ofproto->dpif, &odp_flow);
-
- if (odp_flow.stats.n_packets) {
- update_time(ofproto, rule, &odp_flow.stats);
- netflow_flow_update_flags(&rule->nf_flow, odp_flow.stats.ip_tos,
- odp_flow.stats.tcp_flags);
+ LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
+ if (rconn_is_connected(ofconn->rconn)) {
+ if (prev) {
+ queue_tx(ofpbuf_clone(buf), prev, prev->reply_counter);
+ }
+ prev = ofconn;
}
}
-
- expired.flow = rule->cr.flow;
- expired.packet_count = rule->packet_count +
- odp_flow.stats.n_packets;
- expired.byte_count = rule->byte_count + odp_flow.stats.n_bytes;
- expired.used = rule->used;
-
- netflow_expire(ofproto->netflow, &rule->nf_flow, &expired);
-
- /* Schedule us to send the accumulated records once we have
- * collected all of them. */
- poll_immediate_wake();
- }
-}
-
-static void
-update_used(struct ofproto *p)
-{
- struct odp_flow *flows;
- size_t n_flows;
- size_t i;
- int error;
-
- error = dpif_flow_list_all(p->dpif, &flows, &n_flows);
- if (error) {
- return;
- }
-
- for (i = 0; i < n_flows; i++) {
- struct odp_flow *f = &flows[i];
- struct rule *rule;
-
- rule = rule_from_cls_rule(
- classifier_find_rule_exactly(&p->cls, &f->key, 0, UINT16_MAX));
- if (!rule || !rule->installed) {
- COVERAGE_INC(ofproto_unexpected_rule);
- dpif_flow_del(p->dpif, f);
- continue;
+ if (prev) {
+ queue_tx(buf, prev, prev->reply_counter);
+ } else {
+ ofpbuf_delete(buf);
}
-
- update_time(p, rule, &f->stats);
- rule_account(p, rule, f->stats.n_bytes);
}
- free(flows);
+ free(ofproto_rule);
}
+ /* pinsched callback for sending 'packet' on 'ofconn'. */
static void
- do_send_packet_in(struct ofconn *ofconn, uint32_t buffer_id,
- const struct wdp_packet *packet, int send_len)
-do_send_packet_in(struct ofpbuf *packet, void *ofconn_)
++do_send_packet_in(struct wdp_packet *packet, void *ofconn_)
{
- struct ofpbuf *opi;
- uint8_t reason;
+ struct ofconn *ofconn = ofconn_;
- reason = packet->channel == WDP_CHAN_ACTION ? OFPR_ACTION : OFPR_NO_MATCH;
- opi = make_packet_in(buffer_id, packet->in_port, reason,
- packet->payload, send_len);
- rconn_send_with_limit(ofconn->rconn, opi, ofconn->packet_in_counter, 100);
- rconn_send_with_limit(ofconn->rconn, packet,
++ rconn_send_with_limit(ofconn->rconn, packet->payload,
+ ofconn->packet_in_counter, 100);
++ packet->payload = NULL;
++ wdp_packet_destroy(packet);
}
+ /* Takes 'packet', which has been converted with do_convert_to_packet_in(), and
+ * finalizes its content for sending on 'ofconn', and passes it to 'ofconn''s
+ * packet scheduler for sending.
+ *
+ * 'max_len' specifies the maximum number of bytes of the packet to send on
+ * 'ofconn' (INT_MAX specifies no limit).
+ *
+ * If 'clone' is true, the caller retains ownership of 'packet'. Otherwise,
+ * ownership is transferred to this function. */
static void
- send_packet_in_action(struct wdp_packet *packet, void *p_)
- {
- struct ofproto *p = p_;
- struct ofconn *ofconn;
-schedule_packet_in(struct ofconn *ofconn, struct ofpbuf *packet, int max_len,
- bool clone)
++schedule_packet_in(struct ofconn *ofconn, struct wdp_packet *packet,
++ int max_len, bool clone)
+ {
+ struct ofproto *ofproto = ofconn->ofproto;
- struct ofp_packet_in *opi = packet->data;
- uint16_t in_port = ofp_port_to_odp_port(ntohs(opi->in_port));
++ struct ofp_packet_in *opi = packet->payload->data;
+ int send_len, trim_size;
+ uint32_t buffer_id;
+
+ /* Get buffer. */
+ if (opi->reason == OFPR_ACTION) {
+ buffer_id = UINT32_MAX;
+ } else if (ofproto->fail_open && fail_open_is_active(ofproto->fail_open)) {
+ buffer_id = pktbuf_get_null();
+ } else if (!ofconn->pktbuf) {
+ buffer_id = UINT32_MAX;
+ } else {
+ struct ofpbuf payload;
+ payload.data = opi->data;
- payload.size = packet->size - offsetof(struct ofp_packet_in, data);
- buffer_id = pktbuf_save(ofconn->pktbuf, &payload, in_port);
++ payload.size = (packet->payload->size
++ - offsetof(struct ofp_packet_in, data));
++ buffer_id = pktbuf_save(ofconn->pktbuf, &payload, packet->in_port);
+ }
- LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
- if (ofconn == p->controller || ofconn->miss_send_len) {
- do_send_packet_in(ofconn, UINT32_MAX, packet, packet->send_len);
- }
+ /* Figure out how much of the packet to send. */
+ send_len = ntohs(opi->total_len);
+ if (buffer_id != UINT32_MAX) {
+ send_len = MIN(send_len, ofconn->miss_send_len);
}
- wdp_packet_destroy(packet);
+ send_len = MIN(send_len, max_len);
+
+ /* Adjust packet length and clone if necessary. */
+ trim_size = offsetof(struct ofp_packet_in, data) + send_len;
+ if (clone) {
- packet = ofpbuf_clone_data(packet->data, trim_size);
- opi = packet->data;
++ packet = wdp_packet_clone(packet, trim_size);
++ opi = packet->payload->data;
+ } else {
- packet->size = trim_size;
++ packet->payload->size = trim_size;
+ }
+
+ /* Update packet headers. */
+ opi->buffer_id = htonl(buffer_id);
- update_openflow_length(packet);
++ update_openflow_length(packet->payload);
+
+ /* Hand over to packet scheduler. It might immediately call into
+ * do_send_packet_in() or it might buffer it for a while (until a later
+ * call to pinsched_run()). */
- pinsched_send(ofconn->schedulers[opi->reason], in_port,
++ pinsched_send(ofconn->schedulers[opi->reason], packet->in_port,
+ packet, do_send_packet_in, ofconn);
}
-/* Replace struct odp_msg header in 'packet' by equivalent struct
- * ofp_packet_in. The odp_msg must have sufficient headroom to do so (e.g. as
- * returned by dpif_recv()).
++/* Converts 'packet->payload' to a struct ofp_packet_in. It must have
++ * sufficient headroom to do so (e.g. as returned by dpif_recv()).
+ *
+ * The conversion is not complete: the caller still needs to trim any unneeded
+ * payload off the end of the buffer, set the length in the OpenFlow header,
+ * and set buffer_id. Those require us to know the controller settings and so
+ * must be done on a per-controller basis.
+ *
+ * Returns the maximum number of bytes of the packet that should be sent to
+ * the controller (INT_MAX if no limit). */
+ static int
-do_convert_to_packet_in(struct ofpbuf *packet)
++do_convert_to_packet_in(struct wdp_packet *packet)
+ {
- struct odp_msg *msg = packet->data;
++ uint16_t total_len = packet->payload->size;
+ struct ofp_packet_in *opi;
- uint8_t reason;
- uint16_t total_len;
- uint16_t in_port;
- int max_len;
-
- /* Extract relevant header fields */
- if (msg->type == _ODPL_ACTION_NR) {
- reason = OFPR_ACTION;
- max_len = msg->arg;
- } else {
- reason = OFPR_NO_MATCH;
- max_len = INT_MAX;
- }
- total_len = msg->length - sizeof *msg;
- in_port = odp_port_to_ofp_port(msg->port);
+
+ /* Repurpose packet buffer by overwriting header. */
- ofpbuf_pull(packet, sizeof(struct odp_msg));
- opi = ofpbuf_push_zeros(packet, offsetof(struct ofp_packet_in, data));
++ opi = ofpbuf_push_zeros(packet->payload,
++ offsetof(struct ofp_packet_in, data));
+ opi->header.version = OFP_VERSION;
+ opi->header.type = OFPT_PACKET_IN;
+ opi->total_len = htons(total_len);
- opi->in_port = htons(in_port);
- opi->reason = reason;
-
- return max_len;
++ opi->in_port = htons(packet->in_port);
++ if (packet->channel == WDP_CHAN_MISS) {
++ opi->reason = OFPR_NO_MATCH;
++ return INT_MAX;
++ } else {
++ opi->reason = OFPR_ACTION;
++ return packet->send_len;
++ }
+ }
+
-/* Given 'packet' containing an odp_msg of type _ODPL_ACTION_NR or
- * _ODPL_MISS_NR, sends an OFPT_PACKET_IN message to each OpenFlow controller
- * as necessary according to their individual configurations.
++/* Given 'packet' with channel WDP_CHAN_ACTION or WDP_CHAN_MISS, sends an
++ * OFPT_PACKET_IN message to each OpenFlow controller as necessary according to
++ * their individual configurations.
+ *
- * 'packet' must have sufficient headroom to convert it into a struct
++ * 'packet->payload' must have sufficient headroom to convert it into a struct
+ * ofp_packet_in (e.g. as returned by dpif_recv()).
+ *
+ * Takes ownership of 'packet'. */
static void
- send_packet_in_miss(struct wdp_packet *packet, void *p_)
-send_packet_in(struct ofproto *ofproto, struct ofpbuf *packet)
++send_packet_in(struct ofproto *ofproto, struct wdp_packet *packet)
{
- struct ofproto *p = p_;
- bool in_fail_open = p->fail_open && fail_open_is_active(p->fail_open);
- struct ofconn *ofconn;
+ struct ofconn *ofconn, *prev;
+ int max_len;
- LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
- if (ofconn->miss_send_len) {
- struct pktbuf *pb = ofconn->pktbuf;
- uint32_t buffer_id = (in_fail_open
- ? pktbuf_get_null()
- : pktbuf_save(pb, packet->payload,
- packet->in_port));
- int send_len = (buffer_id != UINT32_MAX ? ofconn->miss_send_len
- : UINT32_MAX);
- do_send_packet_in(ofconn, buffer_id, packet, send_len);
+ max_len = do_convert_to_packet_in(packet);
+
+ prev = NULL;
+ LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) {
+ if (ofconn_receives_async_msgs(ofconn)) {
+ if (prev) {
+ schedule_packet_in(prev, packet, max_len, true);
+ }
+ prev = ofconn;
}
}
- wdp_packet_destroy(packet);
+ if (prev) {
+ schedule_packet_in(prev, packet, max_len, false);
+ } else {
- ofpbuf_delete(packet);
++ wdp_packet_destroy(packet);
+ }
}
static uint64_t
struct ofpbuf;
struct switch_status;
+struct wdp_packet;
-typedef void pinsched_tx_cb(struct ofpbuf *, void *aux);
+typedef void pinsched_tx_cb(struct wdp_packet *, void *aux);
struct pinsched *pinsched_create(int rate_limit, int burst_limit,
struct switch_status *);
+ void pinsched_get_limits(const struct pinsched *,
+ int *rate_limit, int *burst_limit);
void pinsched_set_limits(struct pinsched *, int rate_limit, int burst_limit);
void pinsched_destroy(struct pinsched *);
-void pinsched_send(struct pinsched *, uint16_t port_no, struct ofpbuf *,
+void pinsched_send(struct pinsched *, uint16_t port_no, struct wdp_packet *,
pinsched_tx_cb *, void *aux);
void pinsched_run(struct pinsched *, pinsched_tx_cb *, void *aux);
void pinsched_wait(struct pinsched *);
--- /dev/null
- flow_extract(packet, in_port, &flow);
+/*
+ * Copyright (c) 2010 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "wdp-xflow.h"
+
+#include <errno.h>
+#include <inttypes.h>
+
+#include "coverage.h"
+#include "dhcp.h"
+#include "netdev.h"
+#include "netflow.h"
+#include "ofpbuf.h"
+#include "openflow/nicira-ext.h"
+#include "openflow/openflow.h"
+#include "packets.h"
+#include "poll-loop.h"
+#include "port-array.h"
+#include "shash.h"
+#include "stp.h"
+#include "svec.h"
+#include "timeval.h"
+#include "util.h"
+#include "vconn.h"
+#include "wdp-provider.h"
+#include "xfif.h"
+#include "xflow-util.h"
+#include "xtoxll.h"
+
+#define THIS_MODULE VLM_wdp_xflow
+#include "vlog.h"
+
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+\f
+/* Maximum numbers of rules. */
+#define WX_MAX_WILD 65536 /* Wildcarded rules. */
+#define WX_MAX_EXACT 1048576 /* Exact-match rules. */
+
+struct wx {
+ struct list list_node;
+ struct wdp wdp;
+ struct xfif *xfif;
+ struct classifier cls;
+ struct netdev_monitor *netdev_monitor;
+ struct port_array ports; /* Index is ODP port nr; wdp_port->opp.port_no
+ * is OFP port nr. */
+ struct shash port_by_name;
+ bool need_revalidate;
+ long long int next_expiration;
+};
+
+static struct list all_wx = LIST_INITIALIZER(&all_wx);
+
+static int wx_port_init(struct wx *);
+static void wx_port_run(struct wx *);
+static void wx_port_refresh_groups(struct wx *);
+
+enum {
+ WX_GROUP_FLOOD = 0,
+ WX_GROUP_ALL = 1
+};
+
+static struct wx *
+wx_cast(const struct wdp *wdp)
+{
+ return CONTAINER_OF(wdp, struct wx, wdp);
+}
+
+static int
+wx_xlate_actions(struct wx *, const union ofp_action *, size_t n,
+ const flow_t *flow, const struct ofpbuf *packet,
+ struct xflow_actions *out, bool *may_set_up_flow);
+\f
+struct wx_rule {
+ struct wdp_rule wr;
+
+ uint64_t packet_count; /* Number of packets received. */
+ uint64_t byte_count; /* Number of bytes received. */
+ uint64_t accounted_bytes; /* Number of bytes passed to account_cb. */
+ long long int used; /* Last-used time (0 if never used). */
+
+ /* If 'super' is non-NULL, this rule is a subrule, that is, it is an
+ * exact-match rule (having cr.wc.wildcards of 0) generated from the
+ * wildcard rule 'super'. In this case, 'list' is an element of the
+ * super-rule's list.
+ *
+ * If 'super' is NULL, this rule is a super-rule, and 'list' is the head of
+ * a list of subrules. A super-rule with no wildcards (where
+ * cr.wc.wildcards is 0) will never have any subrules. */
+ struct wx_rule *super;
+ struct list list;
+
+ /* Datapath actions.
+ *
+ * A super-rule with wildcard fields never has XFLOW actions (since the
+ * datapath only supports exact-match flows). */
+ bool installed; /* Installed in datapath? */
+ bool may_install; /* True ordinarily; false if actions must
+ * be reassessed for every packet. */
+ int n_xflow_actions;
+ union xflow_action *xflow_actions;
+};
+
+static void wx_rule_destroy(struct wx *, struct wx_rule *);
+static void wx_rule_update_actions(struct wx *, struct wx_rule *);
+static void wx_rule_execute(struct wx *, struct wx_rule *,
+ struct ofpbuf *packet, const flow_t *);
+static bool wx_rule_make_actions(struct wx *, struct wx_rule *,
+ const struct ofpbuf *packet);
+static void wx_rule_install(struct wx *, struct wx_rule *,
+ struct wx_rule *displaced_rule);
+
+static struct wx_rule *
+wx_rule_cast(const struct cls_rule *cls_rule)
+{
+ return cls_rule ? CONTAINER_OF(cls_rule, struct wx_rule, wr.cr) : NULL;
+}
+
+/* Returns true if 'rule' is merely an implementation detail that should be
+ * hidden from the client. */
+static inline bool
+wx_rule_is_hidden(const struct wx_rule *rule)
+{
+ return rule->super != NULL;
+}
+
+static void
+wx_rule_free(struct wx_rule *rule)
+{
+ wdp_rule_uninit(&rule->wr);
+ free(rule->xflow_actions);
+ free(rule);
+}
+
+static void
+wx_rule_account(struct wx *wx OVS_UNUSED, struct wx_rule *rule OVS_UNUSED,
+ uint64_t extra_bytes OVS_UNUSED)
+{
+ /* XXX call account_cb hook */
+}
+
+static void
+wx_rule_post_uninstall(struct wx *wx, struct wx_rule *rule)
+{
+ struct wx_rule *super = rule->super;
+
+ wx_rule_account(wx, rule, 0);
+
+ /* XXX netflow expiration */
+
+ if (super) {
+ super->packet_count += rule->packet_count;
+ super->byte_count += rule->byte_count;
+
+ /* Reset counters to prevent double counting if the rule ever gets
+ * reinstalled. */
+ rule->packet_count = 0;
+ rule->byte_count = 0;
+ rule->accounted_bytes = 0;
+
+ //XXX netflow_flow_clear(&rule->nf_flow);
+ }
+}
+
+static long long int
+xflow_flow_stats_to_msec(const struct xflow_flow_stats *stats)
+{
+ return (stats->used_sec
+ ? stats->used_sec * 1000 + stats->used_nsec / 1000000
+ : 0);
+}
+
+static void
+wx_rule_update_time(struct wx *wx OVS_UNUSED, struct wx_rule *rule,
+ const struct xflow_flow_stats *stats)
+{
+ long long int used = xflow_flow_stats_to_msec(stats);
+ if (used > rule->used) {
+ rule->used = used;
+ if (rule->super && used > rule->super->used) {
+ rule->super->used = used;
+ }
+ //XXX netflow_flow_update_time(ofproto->netflow, &rule->nf_flow, used);
+ }
+}
+
+static void
+wx_rule_update_stats(struct wx *wx, struct wx_rule *rule,
+ const struct xflow_flow_stats *stats)
+{
+ if (stats->n_packets) {
+ wx_rule_update_time(wx, rule, stats);
+ rule->packet_count += stats->n_packets;
+ rule->byte_count += stats->n_bytes;
+ /* XXX netflow_flow_update_flags(&rule->nf_flow, stats->ip_tos,
+ stats->tcp_flags); */
+ }
+}
+
+static void
+wx_rule_uninstall(struct wx *wx, struct wx_rule *rule)
+{
+ assert(!rule->wr.cr.flow.wildcards);
+ if (rule->installed) {
+ struct xflow_flow xflow_flow;
+
+ xflow_key_from_flow(&xflow_flow.key, &rule->wr.cr.flow);
+ xflow_flow.actions = NULL;
+ xflow_flow.n_actions = 0;
+ xflow_flow.flags = 0;
+ if (!xfif_flow_del(wx->xfif, &xflow_flow)) {
+ wx_rule_update_stats(wx, rule, &xflow_flow.stats);
+ }
+ rule->installed = false;
+
+ wx_rule_post_uninstall(wx, rule);
+ }
+}
+
+#if 0
+static bool
+is_controller_rule(struct wx_rule *rule)
+{
+ /* If the only action is send to the controller then don't report
+ * NetFlow expiration messages since it is just part of the control
+ * logic for the network and not real traffic. */
+
+ if (rule && rule->super) {
+ struct wdp_rule *super = &rule->super->wr;
+
+ return super->n_actions == 1 &&
+ super->actions[0].type == htons(OFPAT_OUTPUT) &&
+ super->actions[0].output.port == htons(OFPP_CONTROLLER);
+ }
+
+ return false;
+}
+#endif
+
+static void
+wx_rule_remove(struct wx *wx, struct wx_rule *rule)
+{
+ if (rule->wr.cr.flow.wildcards) {
+ COVERAGE_INC(wx_del_wc_flow);
+ wx->need_revalidate = true;
+ } else {
+ wx_rule_uninstall(wx, rule);
+ }
+ classifier_remove(&wx->cls, &rule->wr.cr);
+ wx_rule_destroy(wx, rule);
+}
+
+static bool
+wx_rule_revalidate(struct wx *wx, struct wx_rule *rule)
+{
+ const flow_t *flow = &rule->wr.cr.flow;
+
+ COVERAGE_INC(wx_rule_revalidate);
+ if (rule->super) {
+ struct wx_rule *super;
+ super = wx_rule_cast(classifier_lookup_wild(&wx->cls, flow));
+ if (!super) {
+ wx_rule_remove(wx, rule);
+ return false;
+ } else if (super != rule->super) {
+ COVERAGE_INC(wx_revalidate_moved);
+ list_remove(&rule->list);
+ list_push_back(&super->list, &rule->list);
+ rule->super = super;
+ rule->wr.hard_timeout = super->wr.hard_timeout;
+ rule->wr.idle_timeout = super->wr.idle_timeout;
+ rule->wr.created = super->wr.created;
+ rule->used = 0;
+ }
+ }
+
+ wx_rule_update_actions(wx, rule);
+ return true;
+}
+
+/* Destroys 'rule'. If 'rule' is a subrule, also removes it from its
+ * super-rule's list of subrules. If 'rule' is a super-rule, also iterates
+ * through all of its subrules and revalidates them, destroying any that no
+ * longer has a super-rule (which is probably all of them).
+ *
+ * Before calling this function, the caller must make have removed 'rule' from
+ * the classifier. If 'rule' is an exact-match rule, the caller is also
+ * responsible for ensuring that it has been uninstalled from the datapath. */
+static void
+wx_rule_destroy(struct wx *wx, struct wx_rule *rule)
+{
+ if (!rule->super) {
+ struct wx_rule *subrule, *next;
+ LIST_FOR_EACH_SAFE (subrule, next, struct wx_rule, list, &rule->list) {
+ wx_rule_revalidate(wx, subrule);
+ }
+ } else {
+ list_remove(&rule->list);
+ }
+ wx_rule_free(rule);
+}
+
+#if 0
+static bool
+wx_rule_has_out_port(const struct wx_rule *rule, uint16_t out_port)
+{
+ const union ofp_action *oa;
+ struct actions_iterator i;
+
+ if (out_port == htons(OFPP_NONE)) {
+ return true;
+ }
+ for (oa = actions_first(&i, rule->wr.actions,
+ rule->wr.n_actions);
+ oa;
+ oa = actions_next(&i)) {
+ if (oa->type == htons(OFPAT_OUTPUT) && oa->output.port == out_port) {
+ return true;
+ }
+ }
+ return false;
+}
+#endif
+
+/* Caller is responsible for initializing the 'cr' member of the returned
+ * rule. */
+static struct wx_rule *
+wx_rule_create(struct wx_rule *super,
+ const union ofp_action *actions, size_t n_actions,
+ uint16_t idle_timeout, uint16_t hard_timeout)
+{
+ struct wx_rule *rule = xzalloc(sizeof *rule);
+ wdp_rule_init(&rule->wr, actions, n_actions);
+ rule->wr.idle_timeout = idle_timeout;
+ rule->wr.hard_timeout = hard_timeout;
+ rule->used = rule->wr.created;
+ rule->super = super;
+ if (super) {
+ list_push_back(&super->list, &rule->list);
+ } else {
+ list_init(&rule->list);
+ }
+#if 0
+ netflow_flow_clear(&rule->nf_flow);
+ netflow_flow_update_time(ofproto->netflow, &rule->nf_flow, rule->created);
+#endif
+
+ return rule;
+}
+
+/* Executes the actions indicated by 'rule' on 'packet', which is in flow
+ * 'flow' and is considered to have arrived on XFLOW port 'in_port'.
+ *
+ * The flow that 'packet' actually contains does not need to actually match
+ * 'rule'; the actions in 'rule' will be applied to it either way. Likewise,
+ * the packet and byte counters for 'rule' will be credited for the packet sent
+ * out whether or not the packet actually matches 'rule'.
+ *
+ * If 'rule' is an exact-match rule and 'flow' actually equals the rule's flow,
+ * the caller must already have accurately composed XFLOW actions for it given
+ * 'packet' using rule_make_actions(). If 'rule' is a wildcard rule, or if
+ * 'rule' is an exact-match rule but 'flow' is not the rule's flow, then this
+ * function will compose a set of XFLOW actions based on 'rule''s OpenFlow
+ * actions and apply them to 'packet'. */
+static void
+wx_rule_execute(struct wx *wx, struct wx_rule *rule,
+ struct ofpbuf *packet, const flow_t *flow)
+{
+ const union xflow_action *actions;
+ size_t n_actions;
+ struct xflow_actions a;
+
+ /* Grab or compose the XFLOW actions.
+ *
+ * The special case for an exact-match 'rule' where 'flow' is not the
+ * rule's flow is important to avoid, e.g., sending a packet out its input
+ * port simply because the XFLOW actions were composed for the wrong
+ * scenario. */
+ if (rule->wr.cr.flow.wildcards
+ || !flow_equal(flow, &rule->wr.cr.flow))
+ {
+ struct wx_rule *super = rule->super ? rule->super : rule;
+ if (wx_xlate_actions(wx, super->wr.actions, super->wr.n_actions, flow,
+ packet, &a, NULL)) {
+ return;
+ }
+ actions = a.actions;
+ n_actions = a.n_actions;
+ } else {
+ actions = rule->xflow_actions;
+ n_actions = rule->n_xflow_actions;
+ }
+
+ /* Execute the XFLOW actions. */
+ if (!xfif_execute(wx->xfif, flow->in_port,
+ actions, n_actions, packet)) {
+ struct xflow_flow_stats stats;
+ flow_extract_stats(flow, packet, &stats);
+ wx_rule_update_stats(wx, rule, &stats);
+ rule->used = time_msec();
+ //XXX netflow_flow_update_time(wx->netflow, &rule->nf_flow, rule->used);
+ }
+}
+
+static void
+wx_rule_insert(struct wx *wx, struct wx_rule *rule, struct ofpbuf *packet,
+ uint16_t in_port)
+{
+ struct wx_rule *displaced_rule;
+
+ /* Insert the rule in the classifier. */
+ displaced_rule = wx_rule_cast(classifier_insert(&wx->cls, &rule->wr.cr));
+ if (!rule->wr.cr.flow.wildcards) {
+ wx_rule_make_actions(wx, rule, packet);
+ }
+
+ /* Send the packet and credit it to the rule. */
+ if (packet) {
+ flow_t flow;
- cls_rule_from_flow(&subrule->wr.cr, flow);
++ flow_extract(packet, 0, in_port, &flow);
+ wx_rule_execute(wx, rule, packet, &flow);
+ }
+
+ /* Install the rule in the datapath only after sending the packet, to
+ * avoid packet reordering. */
+ if (rule->wr.cr.flow.wildcards) {
+ COVERAGE_INC(wx_add_wc_flow);
+ wx->need_revalidate = true;
+ } else {
+ wx_rule_install(wx, rule, displaced_rule);
+ }
+
+ /* Free the rule that was displaced, if any. */
+ if (displaced_rule) {
+ rule->wr.client_data = displaced_rule->wr.client_data;
+ wx_rule_destroy(wx, displaced_rule);
+ }
+}
+
+static struct wx_rule *
+wx_rule_create_subrule(struct wx *wx, struct wx_rule *rule, const flow_t *flow)
+{
+ struct wx_rule *subrule;
+
+ subrule = wx_rule_create(rule, NULL, 0,
+ rule->wr.idle_timeout,
+ rule->wr.hard_timeout);
+ COVERAGE_INC(wx_subrule_create);
- a->controller.arg = oao->max_len ? ntohs(oao->max_len) : UINT32_MAX;
++ cls_rule_from_flow(flow, &subrule->wr.cr);
+ classifier_insert_exact(&wx->cls, &subrule->wr.cr);
+
+ return subrule;
+}
+
+/* Returns true if the actions changed, false otherwise. */
+static bool
+wx_rule_make_actions(struct wx *wx, struct wx_rule *rule,
+ const struct ofpbuf *packet)
+{
+ const struct wx_rule *super;
+ struct xflow_actions a;
+ size_t actions_len;
+
+ assert(!rule->wr.cr.flow.wildcards);
+
+ super = rule->super ? rule->super : rule;
+ wx_xlate_actions(wx, super->wr.actions, super->wr.n_actions,
+ &rule->wr.cr.flow, packet, &a, &rule->may_install);
+
+ actions_len = a.n_actions * sizeof *a.actions;
+ if (rule->n_xflow_actions != a.n_actions
+ || memcmp(rule->xflow_actions, a.actions, actions_len)) {
+ COVERAGE_INC(wx_xflow_unchanged);
+ free(rule->xflow_actions);
+ rule->n_xflow_actions = a.n_actions;
+ rule->xflow_actions = xmemdup(a.actions, actions_len);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static int
+do_put_flow(struct wx *wx, struct wx_rule *rule, int flags,
+ struct xflow_flow_put *put)
+{
+ memset(&put->flow.stats, 0, sizeof put->flow.stats);
+ xflow_key_from_flow(&put->flow.key, &rule->wr.cr.flow);
+ put->flow.actions = rule->xflow_actions;
+ put->flow.n_actions = rule->n_xflow_actions;
+ put->flow.flags = 0;
+ put->flags = flags;
+ return xfif_flow_put(wx->xfif, put);
+}
+
+static void
+wx_rule_install(struct wx *wx, struct wx_rule *rule, struct wx_rule *displaced_rule)
+{
+ assert(!rule->wr.cr.flow.wildcards);
+
+ if (rule->may_install) {
+ struct xflow_flow_put put;
+ if (!do_put_flow(wx, rule,
+ XFLOWPF_CREATE | XFLOWPF_MODIFY | XFLOWPF_ZERO_STATS,
+ &put)) {
+ rule->installed = true;
+ if (displaced_rule) {
+ wx_rule_update_stats(wx, displaced_rule, &put.flow.stats);
+ wx_rule_post_uninstall(wx, displaced_rule);
+ }
+ }
+ } else if (displaced_rule) {
+ wx_rule_uninstall(wx, displaced_rule);
+ }
+}
+
+static void
+wx_rule_reinstall(struct wx *wx, struct wx_rule *rule)
+{
+ if (rule->installed) {
+ struct xflow_flow_put put;
+ COVERAGE_INC(wx_dp_missed);
+ do_put_flow(wx, rule, XFLOWPF_CREATE | XFLOWPF_MODIFY, &put);
+ } else {
+ wx_rule_install(wx, rule, NULL);
+ }
+}
+
+static void
+wx_rule_update_actions(struct wx *wx, struct wx_rule *rule)
+{
+ bool actions_changed;
+#if 0
+ uint16_t new_out_iface, old_out_iface;
+
+ old_out_iface = rule->nf_flow.output_iface;
+#endif
+ actions_changed = wx_rule_make_actions(wx, rule, NULL);
+
+ if (rule->may_install) {
+ if (rule->installed) {
+ if (actions_changed) {
+ struct xflow_flow_put put;
+ do_put_flow(wx, rule, XFLOWPF_CREATE | XFLOWPF_MODIFY
+ | XFLOWPF_ZERO_STATS, &put);
+ wx_rule_update_stats(wx, rule, &put.flow.stats);
+#if 0
+ /* Temporarily set the old output iface so that NetFlow
+ * messages have the correct output interface for the old
+ * stats. */
+ new_out_iface = rule->nf_flow.output_iface;
+ rule->nf_flow.output_iface = old_out_iface;
+#endif
+ wx_rule_post_uninstall(wx, rule);
+ //rule->nf_flow.output_iface = new_out_iface;
+ }
+ } else {
+ wx_rule_install(wx, rule, NULL);
+ }
+ } else {
+ wx_rule_uninstall(wx, rule);
+ }
+}
+\f
+static void
+add_output_group_action(struct xflow_actions *actions, uint16_t group,
+ uint16_t *nf_output_iface)
+{
+ xflow_actions_add(actions, XFLOWAT_OUTPUT_GROUP)->output_group.group = group;
+
+ if (group == WX_GROUP_ALL || group == WX_GROUP_FLOOD) {
+ *nf_output_iface = NF_OUT_FLOOD;
+ }
+}
+
+static void
+add_controller_action(struct xflow_actions *actions,
+ const struct ofp_action_output *oao)
+{
+ union xflow_action *a = xflow_actions_add(actions, XFLOWAT_CONTROLLER);
- const flow_t *flow; /* Flow to which these actions correspond. */
++ a->controller.arg = ntohs(oao->max_len);
+}
+
+struct wx_xlate_ctx {
+ /* Input. */
- flow_t flow;
++ flow_t flow; /* Flow to which these actions correspond. */
+ int recurse; /* Recursion level, via xlate_table_action. */
+ struct wx *wx;
+ const struct ofpbuf *packet; /* The packet corresponding to 'flow', or a
+ * null pointer if we are revalidating
+ * without a packet to refer to. */
+
+ /* Output. */
+ struct xflow_actions *out; /* Datapath actions. */
+ //tag_type *tags; /* Tags associated with OFPP_NORMAL actions. */
+ bool may_set_up_flow; /* True ordinarily; false if the actions must
+ * be reassessed for every packet. */
+ uint16_t nf_output_iface; /* Output interface index for NetFlow. */
+};
+
+static void do_xlate_actions(const union ofp_action *in, size_t n_in,
+ struct wx_xlate_ctx *ctx);
+
+static void
+add_output_action(struct wx_xlate_ctx *ctx, uint16_t port)
+{
+ const struct wdp_port *wdp_port = port_array_get(&ctx->wx->ports, port);
+
+ if (wdp_port) {
+ if (wdp_port->opp.config & OFPPC_NO_FWD) {
+ /* Forwarding disabled on port. */
+ return;
+ }
+ } else {
+ /*
+ * We don't have an ofport record for this port, but it doesn't hurt to
+ * allow forwarding to it anyhow. Maybe such a port will appear later
+ * and we're pre-populating the flow table.
+ */
+ }
+
+ xflow_actions_add(ctx->out, XFLOWAT_OUTPUT)->output.port = port;
+ //ctx->nf_output_iface = port;
+}
+
+static struct wx_rule *
+wx_rule_lookup_valid(struct wx *wx, const flow_t *flow)
+{
+ struct wx_rule *rule = wx_rule_cast(classifier_lookup(&wx->cls, flow));
+
+ /* The rule we found might not be valid, since we could be in need of
+ * revalidation. If it is not valid, don't return it. */
+ if (rule
+ && rule->super
+ && wx->need_revalidate
+ && !wx_rule_revalidate(wx, rule)) {
+ COVERAGE_INC(wx_invalidated);
+ return NULL;
+ }
+
+ return rule;
+}
+
+static void
+xlate_table_action(struct wx_xlate_ctx *ctx, uint16_t in_port)
+{
+ if (!ctx->recurse) {
++ uint16_t old_in_port;
+ struct wx_rule *rule;
- flow = *ctx->flow;
- flow.in_port = in_port;
+
- rule = wx_rule_lookup_valid(ctx->wx, &flow);
++ /* Look up a flow with 'in_port' as the input port. Then restore the
++ * original input port (otherwise OFPP_NORMAL and OFPP_IN_PORT will
++ * have surprising behavior). */
++ old_in_port = ctx->flow.in_port;
++ ctx->flow.in_port = in_port;
++ rule = wx_rule_lookup_valid(ctx->wx, &ctx->flow);
++ ctx->flow.in_port = old_in_port;
+
- add_output_action(ctx, ctx->flow->in_port);
+ if (rule) {
+ if (rule->super) {
+ rule = rule->super;
+ }
+
+ ctx->recurse++;
+ do_xlate_actions(rule->wr.actions, rule->wr.n_actions, ctx);
+ ctx->recurse--;
+ }
+ }
+}
+
+static void
+xlate_output_action(struct wx_xlate_ctx *ctx,
+ const struct ofp_action_output *oao)
+{
+ uint16_t xflow_port;
+ uint16_t prev_nf_output_iface = ctx->nf_output_iface;
+
+ ctx->nf_output_iface = NF_OUT_DROP;
+
+ switch (ntohs(oao->port)) {
+ case OFPP_IN_PORT:
- xlate_table_action(ctx, ctx->flow->in_port);
++ add_output_action(ctx, ctx->flow.in_port);
+ break;
+ case OFPP_TABLE:
- if (xflow_port != ctx->flow->in_port) {
++ xlate_table_action(ctx, ctx->flow.in_port);
+ break;
+ case OFPP_NORMAL:
+#if 0
+ if (!ctx->wx->ofhooks->normal_cb(ctx->flow, ctx->packet,
+ ctx->out, ctx->tags,
+ &ctx->nf_output_iface,
+ ctx->wx->aux)) {
+ COVERAGE_INC(wx_uninstallable);
+ ctx->may_set_up_flow = false;
+ }
+ break;
+#else
+ /* fall through to flood for now */
+#endif
+ case OFPP_FLOOD:
+ add_output_group_action(ctx->out, WX_GROUP_FLOOD,
+ &ctx->nf_output_iface);
+ break;
+ case OFPP_ALL:
+ add_output_group_action(ctx->out, WX_GROUP_ALL, &ctx->nf_output_iface);
+ break;
+ case OFPP_CONTROLLER:
+ add_controller_action(ctx->out, oao);
+ break;
+ case OFPP_LOCAL:
+ add_output_action(ctx, XFLOWP_LOCAL);
+ break;
+ default:
+ xflow_port = ofp_port_to_xflow_port(ntohs(oao->port));
- port = port_array_get(&ctx->wx->ports, ctx->flow->in_port);
++ if (xflow_port != ctx->flow.in_port) {
+ add_output_action(ctx, xflow_port);
+ }
+ break;
+ }
+
+ if (prev_nf_output_iface == NF_OUT_FLOOD) {
+ ctx->nf_output_iface = NF_OUT_FLOOD;
+ } else if (ctx->nf_output_iface == NF_OUT_DROP) {
+ ctx->nf_output_iface = prev_nf_output_iface;
+ } else if (prev_nf_output_iface != NF_OUT_DROP &&
+ ctx->nf_output_iface != NF_OUT_FLOOD) {
+ ctx->nf_output_iface = NF_OUT_MULTI;
+ }
+}
+
+static void
+xlate_nicira_action(struct wx_xlate_ctx *ctx,
+ const struct nx_action_header *nah)
+{
+ const struct nx_action_resubmit *nar;
++ const struct nx_action_set_tunnel *nast;
++ union xflow_action *oa;
+ int subtype = ntohs(nah->subtype);
+
+ assert(nah->vendor == htonl(NX_VENDOR_ID));
+ switch (subtype) {
+ case NXAST_RESUBMIT:
+ nar = (const struct nx_action_resubmit *) nah;
+ xlate_table_action(ctx, ofp_port_to_xflow_port(ntohs(nar->in_port)));
+ break;
+
++ case NXAST_SET_TUNNEL:
++ nast = (const struct nx_action_set_tunnel *) nah;
++ oa = xflow_actions_add(ctx->out, XFLOWAT_SET_TUNNEL);
++ ctx->flow.tun_id = oa->tunnel.tun_id = nast->tun_id;
++ break;
++
++ /* If you add a new action here that modifies flow data, don't forget to
++ * update the flow key in ctx->flow in the same key. */
++
+ default:
+ VLOG_DBG_RL(&rl, "unknown Nicira action type %"PRIu16, subtype);
+ break;
+ }
+}
+
+static void
+do_xlate_actions(const union ofp_action *in, size_t n_in,
+ struct wx_xlate_ctx *ctx)
+{
+ struct actions_iterator iter;
+ const union ofp_action *ia;
+ const struct wdp_port *port;
+
- port->opp.config & (eth_addr_equals(ctx->flow->dl_dst, stp_eth_addr)
++ port = port_array_get(&ctx->wx->ports, ctx->flow.in_port);
+ if (port && port->opp.config & (OFPPC_NO_RECV | OFPPC_NO_RECV_STP) &&
- oa->nw_addr.nw_addr = ia->nw_addr.nw_addr;
++ port->opp.config & (eth_addr_equals(ctx->flow.dl_dst, stp_eth_addr)
+ ? OFPPC_NO_RECV_STP : OFPPC_NO_RECV)) {
+ /* Drop this flow. */
+ return;
+ }
+
+ for (ia = actions_first(&iter, in, n_in); ia; ia = actions_next(&iter)) {
+ uint16_t type = ntohs(ia->type);
+ union xflow_action *oa;
+
+ switch (type) {
+ case OFPAT_OUTPUT:
+ xlate_output_action(ctx, &ia->output);
+ break;
+
+ case OFPAT_SET_VLAN_VID:
+ oa = xflow_actions_add(ctx->out, XFLOWAT_SET_DL_TCI);
+ oa->dl_tci.tci = ia->vlan_vid.vlan_vid & htons(VLAN_VID_MASK);
+ oa->dl_tci.mask = htons(VLAN_VID_MASK);
++ ctx->flow.dl_vlan = ia->vlan_vid.vlan_vid;
+ break;
+
+ case OFPAT_SET_VLAN_PCP:
+ oa = xflow_actions_add(ctx->out, XFLOWAT_SET_DL_TCI);
+ oa->dl_tci.tci = htons((ia->vlan_pcp.vlan_pcp << VLAN_PCP_SHIFT)
+ & VLAN_PCP_MASK);
+ oa->dl_tci.mask = htons(VLAN_PCP_MASK);
++
++ if (ctx->flow.dl_vlan == htons(OFP_VLAN_NONE)) {
++ ctx->flow.dl_vlan = htons(0);
++ }
++ ctx->flow.dl_vlan_pcp = ia->vlan_pcp.vlan_pcp;
+ break;
+
+ case OFPAT_STRIP_VLAN:
+ xflow_actions_add(ctx->out, XFLOWAT_STRIP_VLAN);
++ ctx->flow.dl_vlan = htons(OFP_VLAN_NONE);
++ ctx->flow.dl_vlan_pcp = 0;
+ break;
+
+ case OFPAT_SET_DL_SRC:
+ oa = xflow_actions_add(ctx->out, XFLOWAT_SET_DL_SRC);
+ memcpy(oa->dl_addr.dl_addr,
+ ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN);
++ memcpy(ctx->flow.dl_src,
++ ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN);
+ break;
+
+ case OFPAT_SET_DL_DST:
+ oa = xflow_actions_add(ctx->out, XFLOWAT_SET_DL_DST);
+ memcpy(oa->dl_addr.dl_addr,
+ ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN);
++ memcpy(ctx->flow.dl_dst,
++ ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN);
+ break;
+
+ case OFPAT_SET_NW_SRC:
+ oa = xflow_actions_add(ctx->out, XFLOWAT_SET_NW_SRC);
- oa->nw_addr.nw_addr = ia->nw_addr.nw_addr;
++ ctx->flow.nw_src = oa->nw_addr.nw_addr = ia->nw_addr.nw_addr;
+ break;
+
+ case OFPAT_SET_NW_DST:
+ oa = xflow_actions_add(ctx->out, XFLOWAT_SET_NW_DST);
- oa->nw_tos.nw_tos = ia->nw_tos.nw_tos;
++ ctx->flow.nw_dst = oa->nw_addr.nw_addr = ia->nw_addr.nw_addr;
+ break;
+
+ case OFPAT_SET_NW_TOS:
+ oa = xflow_actions_add(ctx->out, XFLOWAT_SET_NW_TOS);
- oa->tp_port.tp_port = ia->tp_port.tp_port;
++ ctx->flow.nw_tos = oa->nw_tos.nw_tos = ia->nw_tos.nw_tos;
+ break;
+
+ case OFPAT_SET_TP_SRC:
+ oa = xflow_actions_add(ctx->out, XFLOWAT_SET_TP_SRC);
- oa->tp_port.tp_port = ia->tp_port.tp_port;
++ ctx->flow.tp_src = oa->tp_port.tp_port = ia->tp_port.tp_port;
+ break;
+
+ case OFPAT_SET_TP_DST:
+ oa = xflow_actions_add(ctx->out, XFLOWAT_SET_TP_DST);
- ctx.flow = flow;
++ ctx->flow.tp_dst = oa->tp_port.tp_port = ia->tp_port.tp_port;
+ break;
+
+ case OFPAT_VENDOR:
+ xlate_nicira_action(ctx, (const struct nx_action_header *) ia);
+ break;
+
+ default:
+ VLOG_DBG_RL(&rl, "unknown action type %"PRIu16, type);
+ break;
+ }
+ }
+}
+
+/* Returns true if 'flow' and 'actions' may be set up as a flow in the kernel.
+ * This is true most of the time, but we don't allow flows that would prevent
+ * DHCP replies from being seen by the local port to be set up in the
+ * kernel.
+ *
+ * We only need this, strictly speaking, when in-band control is turned on. */
+static bool
+wx_may_set_up(const flow_t *flow, const struct xflow_actions *actions)
+{
+ if (flow->dl_type == htons(ETH_TYPE_IP)
+ && flow->nw_proto == IP_TYPE_UDP
+ && flow->tp_src == htons(DHCP_SERVER_PORT)
+ && flow->tp_dst == htons(DHCP_CLIENT_PORT)) {
+ int i;
+
+ for (i = 0; i < actions->n_actions; i++) {
+ const struct xflow_action_output *oao = &actions->actions[i].output;
+ if (oao->type == XFLOWAT_OUTPUT && oao->port == XFLOWP_LOCAL) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ return true;
+}
+
+static int
+wx_xlate_actions(struct wx *wx, const union ofp_action *in, size_t n_in,
+ const flow_t *flow, const struct ofpbuf *packet,
+ struct xflow_actions *out, bool *may_set_up_flow)
+{
+ //tag_type no_tags = 0;
+ struct wx_xlate_ctx ctx;
+ COVERAGE_INC(wx_ofp2xflow);
+ xflow_actions_init(out);
- wx = xmalloc(sizeof *wx);
++ ctx.flow = *flow;
+ ctx.recurse = 0;
+ ctx.wx = wx;
+ ctx.packet = packet;
+ ctx.out = out;
+ //ctx.tags = tags ? tags : &no_tags;
+ ctx.may_set_up_flow = true;
+ ctx.nf_output_iface = NF_OUT_DROP;
+ do_xlate_actions(in, n_in, &ctx);
+
+ if (may_set_up_flow) {
+ *may_set_up_flow = ctx.may_set_up_flow && wx_may_set_up(flow, out);
+ }
+#if 0
+ if (nf_output_iface) {
+ *nf_output_iface = ctx.nf_output_iface;
+ }
+#endif
+ if (xflow_actions_overflow(out)) {
+ xflow_actions_init(out);
+ return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_TOO_MANY);
+ }
+ return 0;
+}
+\f
+static void
+update_used(struct wx *wx)
+{
+ struct xflow_flow *flows;
+ size_t n_flows;
+ size_t i;
+ int error;
+
+ error = xfif_flow_list_all(wx->xfif, &flows, &n_flows);
+ if (error) {
+ return;
+ }
+
+ for (i = 0; i < n_flows; i++) {
+ struct xflow_flow *f = &flows[i];
+ struct wx_rule *rule;
+ flow_t flow;
+
+ xflow_key_to_flow(&f->key, &flow);
+ rule = wx_rule_cast(classifier_find_rule_exactly(&wx->cls, &flow));
+ if (!rule || !rule->installed) {
+ COVERAGE_INC(wx_unexpected_rule);
+ xfif_flow_del(wx->xfif, f);
+ continue;
+ }
+
+ wx_rule_update_time(wx, rule, &f->stats);
+ wx_rule_account(wx, rule, f->stats.n_bytes);
+ }
+ free(flows);
+}
+
+static void
+uninstall_idle_flow(struct wx *wx, struct wx_rule *rule)
+{
+ assert(rule->installed);
+ assert(!rule->wr.cr.flow.wildcards);
+
+ if (rule->super) {
+ wx_rule_remove(wx, rule);
+ } else {
+ wx_rule_uninstall(wx, rule);
+ }
+}
+
+static void
+expire_rule(struct cls_rule *cls_rule, void *wx_)
+{
+ struct wx *wx = wx_;
+ struct wx_rule *rule = wx_rule_cast(cls_rule);
+ long long int hard_expire, idle_expire, expire, now;
+
+ hard_expire = (rule->wr.hard_timeout
+ ? rule->wr.created + rule->wr.hard_timeout * 1000
+ : LLONG_MAX);
+ idle_expire = (rule->wr.idle_timeout
+ && (rule->super || list_is_empty(&rule->list))
+ ? rule->used + rule->wr.idle_timeout * 1000
+ : LLONG_MAX);
+ expire = MIN(hard_expire, idle_expire);
+
+ now = time_msec();
+ if (now < expire) {
+ if (rule->installed && now >= rule->used + 5000) {
+ uninstall_idle_flow(wx, rule);
+ } else if (!rule->wr.cr.flow.wildcards) {
+ //XXX active_timeout(wx, rule);
+ }
+
+ return;
+ }
+
+ COVERAGE_INC(wx_expired);
+
+ /* Update stats. This code will be a no-op if the rule expired
+ * due to an idle timeout. */
+ if (rule->wr.cr.flow.wildcards) {
+ struct wx_rule *subrule, *next;
+ LIST_FOR_EACH_SAFE (subrule, next, struct wx_rule, list, &rule->list) {
+ wx_rule_remove(wx, subrule);
+ }
+ } else {
+ wx_rule_uninstall(wx, rule);
+ }
+
+#if 0 /* XXX */
+ if (!wx_rule_is_hidden(rule)) {
+ send_flow_removed(wx, rule, now,
+ (now >= hard_expire
+ ? OFPRR_HARD_TIMEOUT : OFPRR_IDLE_TIMEOUT));
+ }
+#endif
+ wx_rule_remove(wx, rule);
+}
+
+struct revalidate_cbdata {
+ struct wx *wx;
+ bool revalidate_all; /* Revalidate all exact-match rules? */
+ bool revalidate_subrules; /* Revalidate all exact-match subrules? */
+ //struct tag_set revalidate_set; /* Set of tags to revalidate. */
+};
+
+static bool
+revalidate_rule(struct wx *wx, struct wx_rule *rule)
+{
+ const flow_t *flow = &rule->wr.cr.flow;
+
+ COVERAGE_INC(wx_revalidate_rule);
+ if (rule->super) {
+ struct wx_rule *super;
+ super = wx_rule_cast(classifier_lookup_wild(&wx->cls, flow));
+ if (!super) {
+ wx_rule_remove(wx, rule);
+ return false;
+ } else if (super != rule->super) {
+ COVERAGE_INC(wx_revalidate_moved);
+ list_remove(&rule->list);
+ list_push_back(&super->list, &rule->list);
+ rule->super = super;
+ rule->wr.hard_timeout = super->wr.hard_timeout;
+ rule->wr.idle_timeout = super->wr.idle_timeout;
+ rule->wr.created = super->wr.created;
+ rule->used = 0;
+ }
+ }
+
+ wx_rule_update_actions(wx, rule);
+ return true;
+}
+
+static void
+revalidate_cb(struct cls_rule *sub_, void *cbdata_)
+{
+ struct wx_rule *sub = wx_rule_cast(sub_);
+ struct revalidate_cbdata *cbdata = cbdata_;
+
+ if (cbdata->revalidate_all
+ || (cbdata->revalidate_subrules && sub->super)
+ /*|| (tag_set_intersects(&cbdata->revalidate_set, sub->tags))*/) {
+ revalidate_rule(cbdata->wx, sub);
+ }
+}
+
+static void
+wx_run_one(struct wx *wx)
+{
+ wx_port_run(wx);
+
+ if (time_msec() >= wx->next_expiration) {
+ COVERAGE_INC(wx_expiration);
+ wx->next_expiration = time_msec() + 1000;
+ update_used(wx);
+
+ classifier_for_each(&wx->cls, CLS_INC_ALL, expire_rule, wx);
+
+ /* XXX account_checkpoint_cb */
+ }
+
+ if (wx->need_revalidate /*|| !tag_set_is_empty(&p->revalidate_set)*/) {
+ struct revalidate_cbdata cbdata;
+ cbdata.wx = wx;
+ cbdata.revalidate_all = false;
+ cbdata.revalidate_subrules = wx->need_revalidate;
+ //cbdata.revalidate_set = wx->revalidate_set;
+ //tag_set_init(&wx->revalidate_set);
+ COVERAGE_INC(wx_revalidate);
+ classifier_for_each(&wx->cls, CLS_INC_EXACT, revalidate_cb, &cbdata);
+ wx->need_revalidate = false;
+ }
+}
+
+static void
+wx_run(void)
+{
+ struct wx *wx;
+
+ LIST_FOR_EACH (wx, struct wx, list_node, &all_wx) {
+ wx_run_one(wx);
+ }
+ xf_run();
+}
+
+static void
+wx_wait_one(struct wx *wx)
+{
+ xfif_port_poll_wait(wx->xfif);
+ netdev_monitor_poll_wait(wx->netdev_monitor);
+ if (wx->need_revalidate /*|| !tag_set_is_empty(&p->revalidate_set)*/) {
+ poll_immediate_wake();
+ } else if (wx->next_expiration != LLONG_MAX) {
+ poll_timer_wait(wx->next_expiration - time_msec());
+ }
+}
+
+static void
+wx_wait(void)
+{
+ struct wx *wx;
+
+ LIST_FOR_EACH (wx, struct wx, list_node, &all_wx) {
+ wx_wait_one(wx);
+ }
+ xf_wait();
+}
+\f
+static int wx_flow_flush(struct wdp *);
+
+static int
+wx_enumerate(const struct wdp_class *wdp_class, struct svec *all_wdps)
+{
+ struct svec names = SVEC_EMPTY_INITIALIZER;
+ int error = xf_enumerate_names(wdp_class->type, &names);
+ svec_move(all_wdps, &names);
+ return error;
+}
+
+static int
+wx_open(const struct wdp_class *wdp_class, const char *name, bool create,
+ struct wdp **wdpp)
+{
+ struct xfif *xfif;
+ int error;
+
+ error = (create
+ ? xfif_create_and_open(name, wdp_class->type, &xfif)
+ : xfif_open(name, wdp_class->type, &xfif));
+ if (!error) {
+ struct wx *wx;
+
- cls_rule_from_flow(&rule->wr.cr, put->flow);
++ wx = xzalloc(sizeof *wx);
+ list_push_back(&all_wx, &wx->list_node);
+ wdp_init(&wx->wdp, wdp_class, name, 0, 0);
+ wx->xfif = xfif;
+ classifier_init(&wx->cls);
+ wx->netdev_monitor = netdev_monitor_create();
+ port_array_init(&wx->ports);
+ shash_init(&wx->port_by_name);
+ wx->next_expiration = time_msec() + 1000;
+
+ wx_port_init(wx);
+
+ *wdpp = &wx->wdp;
+ }
+
+ return error;
+}
+
+static void
+wx_close(struct wdp *wdp)
+{
+ struct wx *wx = wx_cast(wdp);
+
+ wx_flow_flush(wdp);
+ xfif_close(wx->xfif);
+ classifier_destroy(&wx->cls);
+ netdev_monitor_destroy(wx->netdev_monitor);
+ list_remove(&wx->list_node);
+ free(wx);
+}
+
+static int
+wx_get_all_names(const struct wdp *wdp, struct svec *all_names)
+{
+ struct wx *wx = wx_cast(wdp);
+
+ return xfif_get_all_names(wx->xfif, all_names);
+}
+
+static int
+wx_destroy(struct wdp *wdp)
+{
+ struct wx *wx = wx_cast(wdp);
+
+ return xfif_delete(wx->xfif);
+}
+
+static void
+hton_ofp_phy_port(struct ofp_phy_port *opp)
+{
+ opp->port_no = htons(opp->port_no);
+ opp->config = htonl(opp->config);
+ opp->state = htonl(opp->state);
+ opp->curr = htonl(opp->curr);
+ opp->advertised = htonl(opp->advertised);
+ opp->supported = htonl(opp->supported);
+ opp->peer = htonl(opp->peer);
+}
+
+static int
+wx_get_features(const struct wdp *wdp, struct ofpbuf **featuresp)
+{
+ struct wx *wx = wx_cast(wdp);
+ struct ofp_switch_features *osf;
+ struct ofpbuf *buf;
+ unsigned int port_no;
+ struct wdp_port *port;
+
+ buf = ofpbuf_new(sizeof *osf);
+ osf = ofpbuf_put_zeros(buf, sizeof *osf);
+ osf->n_tables = 2;
+ osf->capabilities = htonl(OFPC_ARP_MATCH_IP);
+ osf->actions = htonl((1u << OFPAT_OUTPUT) |
+ (1u << OFPAT_SET_VLAN_VID) |
+ (1u << OFPAT_SET_VLAN_PCP) |
+ (1u << OFPAT_STRIP_VLAN) |
+ (1u << OFPAT_SET_DL_SRC) |
+ (1u << OFPAT_SET_DL_DST) |
+ (1u << OFPAT_SET_NW_SRC) |
+ (1u << OFPAT_SET_NW_DST) |
+ (1u << OFPAT_SET_NW_TOS) |
+ (1u << OFPAT_SET_TP_SRC) |
+ (1u << OFPAT_SET_TP_DST));
+
+ PORT_ARRAY_FOR_EACH (port, &wx->ports, port_no) {
+ hton_ofp_phy_port(ofpbuf_put(buf, &port->opp, sizeof port->opp));
+ }
+
+ *featuresp = buf;
+ return 0;
+}
+
+static void
+count_subrules(struct cls_rule *cls_rule, void *n_subrules_)
+{
+ struct wx_rule *rule = wx_rule_cast(cls_rule);
+ int *n_subrules = n_subrules_;
+
+ if (rule->super) {
+ (*n_subrules)++;
+ }
+}
+
+static int
+wx_get_stats(const struct wdp *wdp, struct wdp_stats *stats)
+{
+ struct wx *wx = wx_cast(wdp);
+ struct xflow_stats xflow_stats;
+ int n_subrules;
+ int error;
+
+ error = xfif_get_xf_stats(wx->xfif, &xflow_stats);
+
+ n_subrules = 0;
+ classifier_for_each(&wx->cls, CLS_INC_EXACT, count_subrules, &n_subrules);
+
+ stats->exact.n_flows = classifier_count_exact(&wx->cls) - n_subrules;
+ stats->exact.cur_capacity = xflow_stats.cur_capacity;
+ stats->exact.max_capacity = MIN(WX_MAX_EXACT, xflow_stats.max_capacity);
+ stats->exact.n_hit = xflow_stats.n_hit;
+ stats->exact.n_missed = xflow_stats.n_missed;
+ stats->exact.n_lost = xflow_stats.n_lost;
+
+ stats->wild.n_flows = classifier_count_wild(&wx->cls);
+ stats->wild.cur_capacity = WX_MAX_WILD;
+ stats->wild.max_capacity = WX_MAX_WILD;
+ stats->wild.n_hit = 0; /* XXX */
+ stats->wild.n_missed = 0; /* XXX */
+ stats->wild.n_lost = 0; /* XXX */
+
+ stats->n_ports = xflow_stats.n_ports;
+ stats->max_ports = xflow_stats.max_ports;
+
+ stats->n_frags = xflow_stats.n_frags;
+
+ stats->max_miss_queue = xflow_stats.max_miss_queue;
+ stats->max_action_queue = xflow_stats.max_action_queue;
+ stats->max_sflow_queue = xflow_stats.max_sflow_queue;
+
+ return error;
+}
+
+static int
+wx_get_drop_frags(const struct wdp *wdp, bool *drop_frags)
+{
+ struct wx *wx = wx_cast(wdp);
+
+ return xfif_get_drop_frags(wx->xfif, drop_frags);
+}
+
+static int
+wx_set_drop_frags(struct wdp *wdp, bool drop_frags)
+{
+ struct wx *wx = wx_cast(wdp);
+
+ return xfif_set_drop_frags(wx->xfif, drop_frags);
+}
+
+static int
+wx_port_add(struct wdp *wdp, const char *devname,
+ bool internal, uint16_t *port_no)
+{
+ struct wx *wx = wx_cast(wdp);
+ uint16_t xflow_flags = internal ? XFLOW_PORT_INTERNAL : 0;
+ return xfif_port_add(wx->xfif, devname, xflow_flags, port_no);
+}
+
+static int
+wx_port_del(struct wdp *wdp, uint16_t port_no)
+{
+ struct wx *wx = wx_cast(wdp);
+
+ return xfif_port_del(wx->xfif, port_no);
+}
+
+static int
+wx_answer_port_query(const struct wdp_port *port, struct wdp_port *portp)
+{
+ if (port) {
+ wdp_port_copy(portp, port);
+ return 0;
+ } else {
+ return ENOENT;
+ }
+}
+
+static int
+wx_port_query_by_number(const struct wdp *wdp, uint16_t port_no,
+ struct wdp_port *portp)
+{
+ struct wx *wx = wx_cast(wdp);
+ const struct wdp_port *port;
+
+ port = port_array_get(&wx->ports, ofp_port_to_xflow_port(port_no));
+ return wx_answer_port_query(port, portp);
+}
+
+static int
+wx_port_query_by_name(const struct wdp *wdp, const char *devname,
+ struct wdp_port *portp)
+{
+ struct wx *wx = wx_cast(wdp);
+
+ return wx_answer_port_query(shash_find_data(&wx->port_by_name, devname),
+ portp);
+}
+
+static int
+wx_port_set_config(struct wdp *wdp, uint16_t port_no, uint32_t config)
+{
+ struct wx *wx = wx_cast(wdp);
+ struct wdp_port *port;
+ uint32_t changes;
+
+ port = port_array_get(&wx->ports, ofp_port_to_xflow_port(port_no));
+ if (!port) {
+ return ENOENT;
+ }
+ changes = config ^ port->opp.config;
+
+ if (changes & OFPPC_PORT_DOWN) {
+ int error;
+ if (config & OFPPC_PORT_DOWN) {
+ error = netdev_turn_flags_off(port->netdev, NETDEV_UP, true);
+ } else {
+ error = netdev_turn_flags_on(port->netdev, NETDEV_UP, true);
+ }
+ if (!error) {
+ port->opp.config ^= OFPPC_PORT_DOWN;
+ }
+ }
+
+#define REVALIDATE_BITS (OFPPC_NO_RECV | OFPPC_NO_RECV_STP | OFPPC_NO_FWD)
+ if (changes & REVALIDATE_BITS) {
+ COVERAGE_INC(wx_costly_flags);
+ port->opp.config ^= changes & REVALIDATE_BITS;
+ wx->need_revalidate = true;
+ }
+#undef REVALIDATE_BITS
+
+ if (changes & OFPPC_NO_FLOOD) {
+ port->opp.config ^= OFPPC_NO_FLOOD;
+ wx_port_refresh_groups(wx);
+ }
+
+ if (changes & OFPPC_NO_PACKET_IN) {
+ port->opp.config ^= OFPPC_NO_PACKET_IN;
+ }
+
+ return 0;
+}
+
+static int
+wx_port_list(const struct wdp *wdp, struct wdp_port **portsp, size_t *n_portsp)
+{
+ struct wx *wx = wx_cast(wdp);
+ struct wdp_port *ports, *port;
+ unsigned int port_no;
+ size_t n_ports, i;
+
+ *n_portsp = n_ports = port_array_count(&wx->ports);
+ *portsp = ports = xmalloc(n_ports * sizeof *ports);
+ i = 0;
+ PORT_ARRAY_FOR_EACH (port, &wx->ports, port_no) {
+ wdp_port_copy(&ports[i++], port);
+ }
+ assert(i == n_ports);
+
+ return 0;
+}
+
+static int
+wx_port_poll(const struct wdp *wdp, char **devnamep)
+{
+ struct wx *wx = wx_cast(wdp);
+
+ return xfif_port_poll(wx->xfif, devnamep);
+}
+
+static void
+wx_port_poll_wait(const struct wdp *wdp)
+{
+ struct wx *wx = wx_cast(wdp);
+
+ xfif_port_poll_wait(wx->xfif);
+}
+
+static struct wdp_rule *
+wx_flow_get(const struct wdp *wdp, const flow_t *flow)
+{
+ struct wx *wx = wx_cast(wdp);
+ struct wx_rule *rule;
+
+ rule = wx_rule_cast(classifier_find_rule_exactly(&wx->cls, flow));
+ return rule && !wx_rule_is_hidden(rule) ? &rule->wr : NULL;
+}
+
+static struct wdp_rule *
+wx_flow_match(const struct wdp *wdp, const flow_t *flow)
+{
+ struct wx *wx = wx_cast(wdp);
+ struct wx_rule *rule;
+
+ rule = wx_rule_cast(classifier_lookup(&wx->cls, flow));
+ if (rule) {
+ if (wx_rule_is_hidden(rule)) {
+ rule = rule->super;
+ }
+ return &rule->wr;
+ } else {
+ return NULL;
+ }
+}
+
+struct wx_for_each_thunk_aux {
+ wdp_flow_cb_func *client_callback;
+ void *client_aux;
+};
+
+static void
+wx_for_each_thunk(struct cls_rule *cls_rule, void *aux_)
+{
+ struct wx_for_each_thunk_aux *aux = aux_;
+ struct wx_rule *rule = wx_rule_cast(cls_rule);
+
+ if (!wx_rule_is_hidden(rule)) {
+ aux->client_callback(&rule->wr, aux->client_aux);
+ }
+}
+
+static void
+wx_flow_for_each_match(const struct wdp *wdp, const flow_t *target,
+ int include,
+ wdp_flow_cb_func *client_callback, void *client_aux)
+{
+ struct wx *wx = wx_cast(wdp);
+ struct wx_for_each_thunk_aux aux;
+
+ aux.client_callback = client_callback;
+ aux.client_aux = client_aux;
+ classifier_for_each_match(&wx->cls, target, include,
+ wx_for_each_thunk, &aux);
+}
+
+/* Obtains statistic counters for 'rule' within 'wx' and stores them into
+ * '*stats'. If 'rule' is a wildcarded rule, the returned statistic include
+ * statistics for all of 'rule''s subrules. */
+static void
+query_stats(struct wx *wx, struct wx_rule *rule, struct wdp_flow_stats *stats)
+{
+ struct wx_rule *subrule;
+ struct xflow_flow *xflow_flows;
+ size_t n_xflow_flows;
+
+ /* Start from historical data for 'rule' itself that are no longer tracked
+ * by the datapath. This counts, for example, subrules that have
+ * expired. */
+ stats->n_packets = rule->packet_count;
+ stats->n_bytes = rule->byte_count;
+ stats->inserted = rule->wr.created;
+ stats->used = LLONG_MIN;
+ stats->tcp_flags = 0;
+ stats->ip_tos = 0;
+
+ /* Prepare to ask the datapath for statistics on 'rule', or if it is
+ * wildcarded then on all of its subrules.
+ *
+ * Also, add any statistics that are not tracked by the datapath for each
+ * subrule. This includes, for example, statistics for packets that were
+ * executed "by hand" by ofproto via xfif_execute() but must be accounted
+ * to a flow. */
+ n_xflow_flows = rule->wr.cr.flow.wildcards ? list_size(&rule->list) : 1;
+ xflow_flows = xzalloc(n_xflow_flows * sizeof *xflow_flows);
+ if (rule->wr.cr.flow.wildcards) {
+ size_t i = 0;
+ LIST_FOR_EACH (subrule, struct wx_rule, list, &rule->list) {
+ xflow_key_from_flow(&xflow_flows[i++].key, &subrule->wr.cr.flow);
+ stats->n_packets += subrule->packet_count;
+ stats->n_bytes += subrule->byte_count;
+ }
+ } else {
+ xflow_key_from_flow(&xflow_flows[0].key, &rule->wr.cr.flow);
+ }
+
+ /* Fetch up-to-date statistics from the datapath and add them in. */
+ if (!xfif_flow_get_multiple(wx->xfif, xflow_flows, n_xflow_flows)) {
+ size_t i;
+ for (i = 0; i < n_xflow_flows; i++) {
+ struct xflow_flow *xflow_flow = &xflow_flows[i];
+ long long int used;
+
+ stats->n_packets += xflow_flow->stats.n_packets;
+ stats->n_bytes += xflow_flow->stats.n_bytes;
+ used = xflow_flow_stats_to_msec(&xflow_flow->stats);
+ if (used > stats->used) {
+ stats->used = used;
+ if (xflow_flow->key.dl_type == htons(ETH_TYPE_IP)
+ && xflow_flow->key.nw_proto == IP_TYPE_TCP) {
+ stats->ip_tos = xflow_flow->stats.ip_tos;
+ }
+ }
+ stats->tcp_flags |= xflow_flow->stats.tcp_flags;
+ }
+ }
+ free(xflow_flows);
+}
+
+static int
+wx_flow_get_stats(const struct wdp *wdp,
+ const struct wdp_rule *wdp_rule,
+ struct wdp_flow_stats *stats)
+{
+ struct wx *wx = wx_cast(wdp);
+ struct wx_rule *rule = wx_rule_cast(&wdp_rule->cr);
+
+ query_stats(wx, rule, stats);
+ return 0;
+}
+
+static bool
+wx_flow_overlaps(const struct wdp *wdp, const flow_t *flow)
+{
+ struct wx *wx = wx_cast(wdp);
+
+ /* XXX overlap with a subrule? */
+ return classifier_rule_overlaps(&wx->cls, flow);
+}
+
+static int
+wx_flow_put(struct wdp *wdp, const struct wdp_flow_put *put,
+ struct wdp_flow_stats *old_stats, struct wdp_rule **rulep)
+{
+ struct wx *wx = wx_cast(wdp);
+ struct wx_rule *rule;
+
+ rule = wx_rule_cast(classifier_find_rule_exactly(&wx->cls, put->flow));
+ if (rule && wx_rule_is_hidden(rule)) {
+ rule = NULL;
+ }
+
+ if (rule) {
+ if (!(put->flags & WDP_PUT_MODIFY)) {
+ return EEXIST;
+ }
+ } else {
+ if (!(put->flags & WDP_PUT_CREATE)) {
+ return EINVAL;
+ }
+ if ((put->flow->wildcards
+ ? classifier_count_wild(&wx->cls) >= WX_MAX_WILD
+ : classifier_count_exact(&wx->cls) >= WX_MAX_EXACT)) {
+ /* XXX subrules should not count against exact-match limit */
+ return ENOBUFS;
+ }
+ }
+
+ rule = wx_rule_create(NULL, put->actions, put->n_actions,
+ put->idle_timeout, put->hard_timeout);
- flow_extract((struct ofpbuf *) packet, in_port, &flow);
++ cls_rule_from_flow(put->flow, &rule->wr.cr);
+ wx_rule_insert(wx, rule, NULL, 0);
+
+ if (old_stats) {
+ /* XXX */
+ memset(old_stats, 0, sizeof *old_stats);
+ }
+ if (rulep) {
+ *rulep = &rule->wr;
+ }
+
+ return 0;
+}
+
+static int
+wx_flow_delete(struct wdp *wdp, struct wdp_rule *wdp_rule,
+ struct wdp_flow_stats *final_stats)
+{
+ struct wx *wx = wx_cast(wdp);
+ struct wx_rule *rule = wx_rule_cast(&wdp_rule->cr);
+
+ wx_rule_remove(wx, rule);
+ if (final_stats) {
+ memset(final_stats, 0, sizeof *final_stats); /* XXX */
+ }
+ return 0;
+}
+
+static void
+wx_flush_rule(struct cls_rule *cls_rule, void *wx_)
+{
+ struct wx_rule *rule = wx_rule_cast(cls_rule);
+ struct wx *wx = wx_;
+
+ /* Mark the flow as not installed, even though it might really be
+ * installed, so that wx_rule_remove() doesn't bother trying to uninstall
+ * it. There is no point in uninstalling it individually since we are
+ * about to blow away all the flows with xfif_flow_flush(). */
+ rule->installed = false;
+
+ wx_rule_remove(wx, rule);
+}
+
+static int
+wx_flow_flush(struct wdp *wdp)
+{
+ struct wx *wx = wx_cast(wdp);
+
+ COVERAGE_INC(wx_flow_flush);
+ classifier_for_each(&wx->cls, CLS_INC_ALL, wx_flush_rule, wx);
+ xfif_flow_flush(wx->xfif);
+ return 0;
+}
+
+static int
+wx_execute(struct wdp *wdp, uint16_t in_port,
+ const union ofp_action actions[], int n_actions,
+ const struct ofpbuf *packet)
+{
+ struct wx *wx = wx_cast(wdp);
+ struct xflow_actions xflow_actions;
+ flow_t flow;
+ int error;
+
- flow_extract(payload, xflow_port_to_ofp_port(msg->port), &flow);
++ flow_extract((struct ofpbuf *) packet, 0, in_port, &flow);
+ error = wx_xlate_actions(wx, actions, n_actions, &flow, packet,
+ &xflow_actions, NULL);
+ if (error) {
+ return error;
+ }
+ xfif_execute(wx->xfif, ofp_port_to_xflow_port(in_port),
+ xflow_actions.actions, xflow_actions.n_actions, packet);
+ return 0;
+}
+
+static int
+wx_flow_inject(struct wdp *wdp, struct wdp_rule *wdp_rule,
+ uint16_t in_port, const struct ofpbuf *packet)
+{
+ struct wx_rule *rule = wx_rule_cast(&wdp_rule->cr);
+ int error;
+
+ error = wx_execute(wdp, in_port, rule->wr.actions, rule->wr.n_actions,
+ packet);
+ if (!error) {
+ rule->packet_count++;
+ rule->byte_count += packet->size;
+ rule->used = time_msec();
+ }
+ return error;
+}
+
+static int
+wx_recv_get_mask(const struct wdp *wdp, int *listen_mask)
+{
+ struct wx *wx = wx_cast(wdp);
+ int xflow_listen_mask;
+ int error;
+
+ error = xfif_recv_get_mask(wx->xfif, &xflow_listen_mask);
+ if (!error) {
+ *listen_mask = 0;
+ if (xflow_listen_mask & XFLOWL_MISS) {
+ *listen_mask |= 1 << WDP_CHAN_MISS;
+ }
+ if (xflow_listen_mask & XFLOWL_ACTION) {
+ *listen_mask |= 1 << WDP_CHAN_ACTION;
+ }
+ if (xflow_listen_mask & XFLOWL_SFLOW) {
+ *listen_mask |= 1 << WDP_CHAN_SFLOW;
+ }
+ }
+ return error;
+}
+
+static int
+wx_recv_set_mask(struct wdp *wdp, int listen_mask)
+{
+ struct wx *wx = wx_cast(wdp);
+ int xflow_listen_mask;
+
+ xflow_listen_mask = 0;
+ if (listen_mask & (1 << WDP_CHAN_MISS)) {
+ xflow_listen_mask |= XFLOWL_MISS;
+ }
+ if (listen_mask & (1 << WDP_CHAN_ACTION)) {
+ xflow_listen_mask |= XFLOWL_ACTION;
+ }
+ if (listen_mask & (1 << WDP_CHAN_SFLOW)) {
+ xflow_listen_mask |= XFLOWL_SFLOW;
+ }
+
+ return xfif_recv_set_mask(wx->xfif, xflow_listen_mask);
+}
+
+static int
+wx_get_sflow_probability(const struct wdp *wdp, uint32_t *probability)
+{
+ struct wx *wx = wx_cast(wdp);
+
+ return xfif_get_sflow_probability(wx->xfif, probability);
+}
+
+static int
+wx_set_sflow_probability(struct wdp *wdp, uint32_t probability)
+{
+ struct wx *wx = wx_cast(wdp);
+
+ return xfif_set_sflow_probability(wx->xfif, probability);
+}
+
+static int
+wx_translate_xflow_msg(struct xflow_msg *msg, struct ofpbuf *payload,
+ struct wdp_packet *packet)
+{
+ packet->in_port = xflow_port_to_ofp_port(msg->port);
+ packet->send_len = 0;
++ packet->tun_id = 0;
+
+ switch (msg->type) {
+ case _XFLOWL_MISS_NR:
+ packet->channel = WDP_CHAN_MISS;
+ packet->payload = payload;
++ packet->tun_id = msg->arg;
+ return 0;
+
+ case _XFLOWL_ACTION_NR:
+ packet->channel = WDP_CHAN_ACTION;
+ packet->payload = payload;
+ packet->send_len = msg->arg;
+ return 0;
+
+ case _XFLOWL_SFLOW_NR:
+ /* XXX */
+ ofpbuf_delete(payload);
+ return ENOSYS;
+
+ default:
+ VLOG_WARN_RL(&rl, "received XFLOW message of unexpected type %"PRIu32,
+ msg->type);
+ ofpbuf_delete(payload);
+ return ENOSYS;
+ }
+}
+
+static const uint8_t *
+get_local_mac(const struct wx *wx)
+{
+ const struct wdp_port *port = port_array_get(&wx->ports, XFLOWP_LOCAL);
+ return port ? port->opp.hw_addr : NULL;
+}
+
+/* Returns true if 'packet' is a DHCP reply to the local port. Such a reply
+ * should be sent to the local port regardless of the flow table.
+ *
+ * We only need this, strictly speaking, when in-band control is turned on. */
+static bool
+wx_is_local_dhcp_reply(const struct wx *wx,
+ const flow_t *flow, const struct ofpbuf *packet)
+{
+ if (flow->dl_type == htons(ETH_TYPE_IP)
+ && flow->nw_proto == IP_TYPE_UDP
+ && flow->tp_src == htons(DHCP_SERVER_PORT)
+ && flow->tp_dst == htons(DHCP_CLIENT_PORT)
+ && packet->l7)
+ {
+ const uint8_t *local_mac = get_local_mac(wx);
+ struct dhcp_header *dhcp = ofpbuf_at(
+ packet, (char *)packet->l7 - (char *)packet->data, sizeof *dhcp);
+ return dhcp && local_mac && eth_addr_equals(dhcp->chaddr, local_mac);
+ }
+
+ return false;
+}
+
+static bool
+wx_explode_rule(struct wx *wx, struct xflow_msg *msg, struct ofpbuf *payload)
+{
+ struct wx_rule *rule;
+ flow_t flow;
+
++ flow_extract(payload, 0, xflow_port_to_ofp_port(msg->port), &flow);
+
+ if (wx_is_local_dhcp_reply(wx, &flow, payload)) {
+ union xflow_action action;
+
+ memset(&action, 0, sizeof(action));
+ action.output.type = XFLOWAT_OUTPUT;
+ action.output.port = XFLOWP_LOCAL;
+ xfif_execute(wx->xfif, msg->port, &action, 1, payload);
+ }
+
+ rule = wx_rule_lookup_valid(wx, &flow);
+ if (!rule) {
+ return false;
+ }
+
+ if (rule->wr.cr.flow.wildcards) {
+ rule = wx_rule_create_subrule(wx, rule, &flow);
+ wx_rule_make_actions(wx, rule, payload);
+ } else {
+ if (!rule->may_install) {
+ /* The rule is not installable, that is, we need to process every
+ * packet, so process the current packet and set its actions into
+ * 'subrule'. */
+ wx_rule_make_actions(wx, rule, payload);
+ } else {
+ /* XXX revalidate rule if it needs it */
+ }
+ }
+
+ wx_rule_execute(wx, rule, payload, &flow);
+ wx_rule_reinstall(wx, rule);
+
+ return true;
+}
+
+static int
+wx_recv(struct wdp *wdp, struct wdp_packet *packet)
+{
+ struct wx *wx = wx_cast(wdp);
+ int i;
+
+ /* XXX need to avoid 50*50 potential cost for caller. */
+ for (i = 0; i < 50; i++) {
+ struct xflow_msg *msg;
+ struct ofpbuf *buf;
+ int error;
+
+ error = xfif_recv(wx->xfif, &buf);
+ if (error) {
+ return error;
+ }
+
+ msg = ofpbuf_pull(buf, sizeof *msg);
+ if (msg->type != _XFLOWL_MISS_NR || !wx_explode_rule(wx, msg, buf)) {
+ return wx_translate_xflow_msg(msg, buf, packet);
+ }
+ ofpbuf_delete(buf);
+ }
+ return EAGAIN;
+}
+
+static void
+wx_recv_wait(struct wdp *wdp)
+{
+ struct wx *wx = wx_cast(wdp);
+
+ xfif_recv_wait(wx->xfif);
+}
+\f
+static void wx_port_update(struct wx *, const char *devname);
+static void wx_port_reinit(struct wx *);
+
+static void
+wx_port_process_change(struct wx *wx, int error, char *devname)
+{
+ if (error == ENOBUFS) {
+ wx_port_reinit(wx);
+ } else if (!error) {
+ wx_port_update(wx, devname);
+ free(devname);
+ }
+}
+
+static void
+wx_port_run(struct wx *wx)
+{
+ char *devname;
+ int error;
+
+ while ((error = xfif_port_poll(wx->xfif, &devname)) != EAGAIN) {
+ wx_port_process_change(wx, error, devname);
+ }
+ while ((error = netdev_monitor_poll(wx->netdev_monitor,
+ &devname)) != EAGAIN) {
+ wx_port_process_change(wx, error, devname);
+ }
+}
+
+static size_t
+wx_port_refresh_group(struct wx *wx, unsigned int group)
+{
+ uint16_t *ports;
+ size_t n_ports;
+ struct wdp_port *port;
+ unsigned int port_no;
+
+ assert(group == WX_GROUP_ALL || group == WX_GROUP_FLOOD);
+
+ ports = xmalloc(port_array_count(&wx->ports) * sizeof *ports);
+ n_ports = 0;
+ PORT_ARRAY_FOR_EACH (port, &wx->ports, port_no) {
+ if (group == WX_GROUP_ALL || !(port->opp.config & OFPPC_NO_FLOOD)) {
+ ports[n_ports++] = port_no;
+ }
+ }
+ xfif_port_group_set(wx->xfif, group, ports, n_ports);
+ free(ports);
+
+ return n_ports;
+}
+
+static void
+wx_port_refresh_groups(struct wx *wx)
+{
+ wx_port_refresh_group(wx, WX_GROUP_FLOOD);
+ wx_port_refresh_group(wx, WX_GROUP_ALL);
+}
+
+static void
+wx_port_reinit(struct wx *wx)
+{
+ struct svec devnames;
+ struct wdp_port *wdp_port;
+ unsigned int port_no;
+ struct xflow_port *xflow_ports;
+ size_t n_xflow_ports;
+ size_t i;
+
+ svec_init(&devnames);
+ PORT_ARRAY_FOR_EACH (wdp_port, &wx->ports, port_no) {
+ svec_add (&devnames, (char *) wdp_port->opp.name);
+ }
+ xfif_port_list(wx->xfif, &xflow_ports, &n_xflow_ports);
+ for (i = 0; i < n_xflow_ports; i++) {
+ svec_add(&devnames, xflow_ports[i].devname);
+ }
+ free(xflow_ports);
+
+ svec_sort_unique(&devnames);
+ for (i = 0; i < devnames.n; i++) {
+ wx_port_update(wx, devnames.names[i]);
+ }
+ svec_destroy(&devnames);
+
+ wx_port_refresh_groups(wx);
+}
+
+static struct wdp_port *
+make_wdp_port(const struct xflow_port *xflow_port)
+{
+ struct netdev_options netdev_options;
+ enum netdev_flags flags;
+ struct wdp_port *wdp_port;
+ struct netdev *netdev;
+ bool carrier;
+ int error;
+
+ memset(&netdev_options, 0, sizeof netdev_options);
+ netdev_options.name = xflow_port->devname;
+ netdev_options.ethertype = NETDEV_ETH_TYPE_NONE;
+ netdev_options.may_create = true;
+ netdev_options.may_open = true;
+
+ error = netdev_open(&netdev_options, &netdev);
+ if (error) {
+ VLOG_WARN_RL(&rl, "ignoring port %s (%"PRIu16") because netdev %s "
+ "cannot be opened (%s)",
+ xflow_port->devname, xflow_port->port,
+ xflow_port->devname, strerror(error));
+ return NULL;
+ }
+
+ wdp_port = xmalloc(sizeof *wdp_port);
+ wdp_port->netdev = netdev;
+ wdp_port->opp.port_no = xflow_port_to_ofp_port(xflow_port->port);
+ netdev_get_etheraddr(netdev, wdp_port->opp.hw_addr);
+ strncpy((char *) wdp_port->opp.name, xflow_port->devname,
+ sizeof wdp_port->opp.name);
+ wdp_port->opp.name[sizeof wdp_port->opp.name - 1] = '\0';
+
+ netdev_get_flags(netdev, &flags);
+ wdp_port->opp.config = flags & NETDEV_UP ? 0 : OFPPC_PORT_DOWN;
+
+ netdev_get_carrier(netdev, &carrier);
+ wdp_port->opp.state = carrier ? 0 : OFPPS_LINK_DOWN;
+
+ netdev_get_features(netdev,
+ &wdp_port->opp.curr, &wdp_port->opp.advertised,
+ &wdp_port->opp.supported, &wdp_port->opp.peer);
+
+ wdp_port->devname = xstrdup(xflow_port->devname);
+ wdp_port->internal = (xflow_port->flags & XFLOW_PORT_INTERNAL) != 0;
+ return wdp_port;
+}
+
+static bool
+wx_port_conflicts(const struct wx *wx, const struct xflow_port *xflow_port)
+{
+ if (port_array_get(&wx->ports, xflow_port->port)) {
+ VLOG_WARN_RL(&rl, "ignoring duplicate port %"PRIu16" in datapath",
+ xflow_port->port);
+ return true;
+ } else if (shash_find(&wx->port_by_name, xflow_port->devname)) {
+ VLOG_WARN_RL(&rl, "ignoring duplicate device %s in datapath",
+ xflow_port->devname);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static int
+wdp_port_equal(const struct wdp_port *a_, const struct wdp_port *b_)
+{
+ const struct ofp_phy_port *a = &a_->opp;
+ const struct ofp_phy_port *b = &b_->opp;
+
+ BUILD_ASSERT_DECL(sizeof *a == 48); /* Detect ofp_phy_port changes. */
+ return (a->port_no == b->port_no
+ && !memcmp(a->hw_addr, b->hw_addr, sizeof a->hw_addr)
+ && !strcmp((char *) a->name, (char *) b->name)
+ && a->state == b->state
+ && a->config == b->config
+ && a->curr == b->curr
+ && a->advertised == b->advertised
+ && a->supported == b->supported
+ && a->peer == b->peer);
+}
+
+static void
+wx_port_install(struct wx *wx, struct wdp_port *wdp_port)
+{
+ uint16_t xflow_port = ofp_port_to_xflow_port(wdp_port->opp.port_no);
+ const char *netdev_name = (const char *) wdp_port->opp.name;
+
+ netdev_monitor_add(wx->netdev_monitor, wdp_port->netdev);
+ port_array_set(&wx->ports, xflow_port, wdp_port);
+ shash_add(&wx->port_by_name, netdev_name, wdp_port);
+}
+
+static void
+wx_port_remove(struct wx *wx, struct wdp_port *wdp_port)
+{
+ uint16_t xflow_port = ofp_port_to_xflow_port(wdp_port->opp.port_no);
+
+ netdev_monitor_remove(wx->netdev_monitor, wdp_port->netdev);
+ port_array_set(&wx->ports, xflow_port, NULL);
+ shash_delete(&wx->port_by_name,
+ shash_find(&wx->port_by_name, (char *) wdp_port->opp.name));
+}
+
+static void
+wx_port_free(struct wdp_port *wdp_port)
+{
+ if (wdp_port) {
+ netdev_close(wdp_port->netdev);
+ free(wdp_port);
+ }
+}
+
+static void
+wx_port_update(struct wx *wx, const char *devname)
+{
+ struct xflow_port xflow_port;
+ struct wdp_port *old_wdp_port;
+ struct wdp_port *new_wdp_port;
+ int error;
+
+ COVERAGE_INC(wx_update_port);
+
+ /* Query the datapath for port information. */
+ error = xfif_port_query_by_name(wx->xfif, devname, &xflow_port);
+
+ /* Find the old wdp_port. */
+ old_wdp_port = shash_find_data(&wx->port_by_name, devname);
+ if (!error) {
+ if (!old_wdp_port) {
+ /* There's no port named 'devname' but there might be a port with
+ * the same port number. This could happen if a port is deleted
+ * and then a new one added in its place very quickly, or if a port
+ * is renamed. In the former case we want to send an OFPPR_DELETE
+ * and an OFPPR_ADD, and in the latter case we want to send a
+ * single OFPPR_MODIFY. We can distinguish the cases by comparing
+ * the old port's ifindex against the new port, or perhaps less
+ * reliably but more portably by comparing the old port's MAC
+ * against the new port's MAC. However, this code isn't that smart
+ * and always sends an OFPPR_MODIFY (XXX). */
+ old_wdp_port = port_array_get(&wx->ports, xflow_port.port);
+ }
+ } else if (error != ENOENT && error != ENODEV) {
+ VLOG_WARN_RL(&rl, "xfif_port_query_by_name returned unexpected error "
+ "%s", strerror(error));
+ return;
+ }
+
+ /* Create a new wdp_port. */
+ new_wdp_port = !error ? make_wdp_port(&xflow_port) : NULL;
+
+ /* Eliminate a few pathological cases. */
+ if (!old_wdp_port && !new_wdp_port) {
+ return;
+ } else if (old_wdp_port && new_wdp_port) {
+ /* Most of the 'config' bits are OpenFlow soft state, but
+ * OFPPC_PORT_DOWN is maintained by the kernel. So transfer the
+ * OpenFlow bits from old_wdp_port. (make_wdp_port() only sets
+ * OFPPC_PORT_DOWN and leaves the other bits 0.) */
+ new_wdp_port->opp.config |= old_wdp_port->opp.config & ~OFPPC_PORT_DOWN;
+
+ if (wdp_port_equal(old_wdp_port, new_wdp_port)) {
+ /* False alarm--no change. */
+ wx_port_free(new_wdp_port);
+ return;
+ }
+ }
+
+ /* Now deal with the normal cases. */
+ if (old_wdp_port) {
+ wx_port_remove(wx, old_wdp_port);
+ }
+ if (new_wdp_port) {
+ wx_port_install(wx, new_wdp_port);
+ }
+ wx_port_free(old_wdp_port);
+}
+
+static int
+wx_port_init(struct wx *wx)
+{
+ struct xflow_port *ports;
+ size_t n_ports;
+ size_t i;
+ int error;
+
+ error = xfif_port_list(wx->xfif, &ports, &n_ports);
+ if (error) {
+ return error;
+ }
+
+ for (i = 0; i < n_ports; i++) {
+ const struct xflow_port *xflow_port = &ports[i];
+ if (!wx_port_conflicts(wx, xflow_port)) {
+ struct wdp_port *wdp_port = make_wdp_port(xflow_port);
+ if (wdp_port) {
+ wx_port_install(wx, wdp_port);
+ }
+ }
+ }
+ free(ports);
+ wx_port_refresh_groups(wx);
+ return 0;
+}
+\f
+void
+wdp_xflow_register(void)
+{
+ static const struct wdp_class wdp_xflow_class = {
+ NULL, /* name */
+ wx_run,
+ wx_wait,
+ wx_enumerate,
+ wx_open,
+ wx_close,
+ wx_get_all_names,
+ wx_destroy,
+ wx_get_features,
+ wx_get_stats,
+ wx_get_drop_frags,
+ wx_set_drop_frags,
+ wx_port_add,
+ wx_port_del,
+ wx_port_query_by_number,
+ wx_port_query_by_name,
+ wx_port_list,
+ wx_port_set_config,
+ wx_port_poll,
+ wx_port_poll_wait,
+ wx_flow_get,
+ wx_flow_match,
+ wx_flow_for_each_match,
+ wx_flow_get_stats,
+ wx_flow_overlaps,
+ wx_flow_put,
+ wx_flow_delete,
+ wx_flow_flush,
+ wx_flow_inject,
+ wx_execute,
+ wx_recv_get_mask,
+ wx_recv_set_mask,
+ wx_get_sflow_probability,
+ wx_set_sflow_probability,
+ wx_recv,
+ wx_recv_wait,
+ };
+
+ static bool inited = false;
+
+ struct svec types;
+ const char *type;
+ bool registered;
+ int i;
+
+ if (inited) {
+ return;
+ }
+ inited = true;
+
+ svec_init(&types);
+ xf_enumerate_types(&types);
+
+ registered = false;
+ SVEC_FOR_EACH (i, type, &types) {
+ struct wdp_class *class;
+
+ class = xmalloc(sizeof *class);
+ *class = wdp_xflow_class;
+ class->type = xstrdup(type);
+ if (registered) {
+ class->run = NULL;
+ class->wait = NULL;
+ }
+ if (!wdp_register_provider(class)) {
+ registered = true;
+ }
+ }
+
+ svec_destroy(&types);
+}
--- /dev/null
+/*
+ * Copyright (c) 2008, 2009, 2010 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "wdp-provider.h"
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "coverage.h"
+#include "dynamic-string.h"
+#include "flow.h"
+#include "netdev.h"
+#include "netlink.h"
+#include "ofp-print.h"
+#include "ofpbuf.h"
+#include "packets.h"
+#include "poll-loop.h"
+#include "shash.h"
+#include "svec.h"
+#include "timeval.h"
+#include "util.h"
+#include "valgrind.h"
+#include "wdp-xflow.h"
+
+#include "vlog.h"
+#define THIS_MODULE VLM_wdp
+\f
+/* wdp_rule */
+
+/* Initializes a new 'struct wdp_rule', copying in the 'n_actions' elements of
+ * 'actions'.
+ *
+ * The caller is responsible for initializing 'rule->cr'. */
+void
+wdp_rule_init(struct wdp_rule *rule, const union ofp_action *actions,
+ size_t n_actions)
+{
+ rule->actions = xmemdup(actions, n_actions * sizeof *actions);
+ rule->n_actions = n_actions;
+ rule->created = time_msec();
+ rule->idle_timeout = 0;
+ rule->hard_timeout = 0;
+ rule->client_data = NULL;
+}
+
+/* Frees the data in 'rule'. */
+void
+wdp_rule_uninit(struct wdp_rule *rule)
+{
+ free(rule->actions);
+}
+\f
+/* wdp */
+
+static const struct wdp_class *base_wdp_classes[] = {
+ /* XXX none yet */
+};
+
+struct registered_wdp_class {
+ const struct wdp_class *wdp_class;
+ int refcount;
+};
+
+static struct shash wdp_classes = SHASH_INITIALIZER(&wdp_classes);
+
+/* Rate limit for individual messages going to or from the datapath, output at
+ * DBG level. This is very high because, if these are enabled, it is because
+ * we really need to see them. */
+static struct vlog_rate_limit wdpmsg_rl = VLOG_RATE_LIMIT_INIT(600, 600);
+
+/* Not really much point in logging many wdp errors. */
+static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
+
+static void log_operation(const struct wdp *, const char *operation,
+ int error);
+
+static void
+wdp_initialize(void)
+{
+ static int status = -1;
+
+ if (status < 0) {
+ int i;
+
+ status = 0;
+ for (i = 0; i < ARRAY_SIZE(base_wdp_classes); i++) {
+ wdp_register_provider(base_wdp_classes[i]);
+ }
+ wdp_xflow_register();
+ }
+}
+
+/* Performs periodic work needed by all the various kinds of wdps.
+ *
+ * If your program opens any wdps, it must call both this function and
+ * netdev_run() within its main poll loop. */
+void
+wdp_run(void)
+{
+ struct shash_node *node;
+ SHASH_FOR_EACH (node, &wdp_classes) {
+ const struct registered_wdp_class *registered_class = node->data;
+ if (registered_class->wdp_class->run) {
+ registered_class->wdp_class->run();
+ }
+ }
+}
+
+/* Arranges for poll_block() to wake up when wdp_run() needs to be called.
+ *
+ * If your program opens any wdps, it must call both this function and
+ * netdev_wait() within its main poll loop. */
+void
+wdp_wait(void)
+{
+ struct shash_node *node;
+ SHASH_FOR_EACH(node, &wdp_classes) {
+ const struct registered_wdp_class *registered_class = node->data;
+ if (registered_class->wdp_class->wait) {
+ registered_class->wdp_class->wait();
+ }
+ }
+}
+
+/* Registers a new datapath provider. After successful registration, new
+ * datapaths of that type can be opened using wdp_open(). */
+int
+wdp_register_provider(const struct wdp_class *new_class)
+{
+ struct registered_wdp_class *registered_class;
+
+ if (shash_find(&wdp_classes, new_class->type)) {
+ VLOG_WARN("attempted to register duplicate datapath provider: %s",
+ new_class->type);
+ return EEXIST;
+ }
+
+ registered_class = xmalloc(sizeof *registered_class);
+ registered_class->wdp_class = new_class;
+ registered_class->refcount = 0;
+
+ shash_add(&wdp_classes, new_class->type, registered_class);
+
+ return 0;
+}
+
+/* Unregisters a datapath provider. 'type' must have been previously
+ * registered and not currently be in use by any wdps. After unregistration
+ * new datapaths of that type cannot be opened using wdp_open(). */
+int
+wdp_unregister_provider(const char *type)
+{
+ struct shash_node *node;
+ struct registered_wdp_class *registered_class;
+
+ node = shash_find(&wdp_classes, type);
+ if (!node) {
+ VLOG_WARN("attempted to unregister a datapath provider that is not "
+ "registered: %s", type);
+ return EAFNOSUPPORT;
+ }
+
+ registered_class = node->data;
+ if (registered_class->refcount) {
+ VLOG_WARN("attempted to unregister in use datapath provider: %s",
+ type);
+ return EBUSY;
+ }
+
+ shash_delete(&wdp_classes, node);
+ free(registered_class);
+
+ return 0;
+}
+
+/* Clears 'types' and enumerates the types of all currently registered wdp
+ * providers into it. The caller must first initialize the svec. */
+void
+wdp_enumerate_types(struct svec *types)
+{
+ struct shash_node *node;
+
+ wdp_initialize();
+ svec_clear(types);
+
+ SHASH_FOR_EACH (node, &wdp_classes) {
+ const struct registered_wdp_class *registered_class = node->data;
+ svec_add(types, registered_class->wdp_class->type);
+ }
+}
+
+/* Clears 'names' and enumerates the names of all known created datapaths
+ * with the given 'type'. The caller must first initialize the svec. Returns 0
+ * if successful, otherwise a positive errno value.
+ *
+ * Some kinds of datapaths might not be practically enumerable. This is not
+ * considered an error. */
+int
+wdp_enumerate_names(const char *type, struct svec *names)
+{
+ const struct registered_wdp_class *registered_class;
+ const struct wdp_class *wdp_class;
+ int error;
+
+ wdp_initialize();
+ svec_clear(names);
+
+ registered_class = shash_find_data(&wdp_classes, type);
+ if (!registered_class) {
+ VLOG_WARN("could not enumerate unknown type: %s", type);
+ return EAFNOSUPPORT;
+ }
+
+ wdp_class = registered_class->wdp_class;
+ error = (wdp_class->enumerate
+ ? wdp_class->enumerate(wdp_class, names)
+ : 0);
+
+ if (error) {
+ VLOG_WARN("failed to enumerate %s datapaths: %s", wdp_class->type,
+ strerror(error));
+ }
+
+ return error;
+}
+
+/* Parses 'datapath_name', which is of the form type@name, into its
+ * component pieces. 'name' and 'type' must be freed by the caller. */
+void
+wdp_parse_name(const char *datapath_name_, char **name, char **type)
+{
+ char *datapath_name = xstrdup(datapath_name_);
+ char *separator;
+
+ separator = strchr(datapath_name, '@');
+ if (separator) {
+ *separator = '\0';
+ *type = datapath_name;
+ *name = xstrdup(separator + 1);
+ } else {
+ *name = datapath_name;
+ *type = NULL;
+ }
+}
+
+static int
+do_open(const char *name, const char *type, bool create, struct wdp **wdpp)
+{
+ struct wdp *wdp = NULL;
+ int error;
+ struct registered_wdp_class *registered_class;
+
+ wdp_initialize();
+
+ if (!type || *type == '\0') {
+ type = "system";
+ }
+
+ registered_class = shash_find_data(&wdp_classes, type);
+ if (!registered_class) {
+ VLOG_WARN("could not create datapath %s of unknown type %s", name,
+ type);
+ error = EAFNOSUPPORT;
+ goto exit;
+ }
+
+ error = registered_class->wdp_class->open(registered_class->wdp_class,
+ name, create, &wdp);
+ if (!error) {
+ registered_class->refcount++;
+ }
+
+exit:
+ *wdpp = error ? NULL : wdp;
+ return error;
+}
+
+/* Tries to open an existing datapath named 'name' and type 'type'. Will fail
+ * if no datapath with 'name' and 'type' exists. 'type' may be either NULL or
+ * the empty string to specify the default system type. Returns 0 if
+ * successful, otherwise a positive errno value. On success stores a pointer
+ * to the datapath in '*wdpp', otherwise a null pointer. */
+int
+wdp_open(const char *name, const char *type, struct wdp **wdpp)
+{
+ return do_open(name, type, false, wdpp);
+}
+
+/* Tries to create and open a new datapath with the given 'name' and 'type'.
+ * 'type' may be either NULL or the empty string to specify the default system
+ * type. Will fail if a datapath with 'name' and 'type' already exists.
+ * Returns 0 if successful, otherwise a positive errno value. On success
+ * stores a pointer to the datapath in '*wdpp', otherwise a null pointer. */
+int
+wdp_create(const char *name, const char *type, struct wdp **wdpp)
+{
+ return do_open(name, type, true, wdpp);
+}
+
+/* Tries to open a datapath with the given 'name' and 'type', creating it if it
+ * does not exist. 'type' may be either NULL or the empty string to specify
+ * the default system type. Returns 0 if successful, otherwise a positive
+ * errno value. On success stores a pointer to the datapath in '*wdpp',
+ * otherwise a null pointer. */
+int
+wdp_create_and_open(const char *name, const char *type, struct wdp **wdpp)
+{
+ int error;
+
+ error = wdp_create(name, type, wdpp);
+ if (error == EEXIST || error == EBUSY) {
+ error = wdp_open(name, type, wdpp);
+ if (error) {
+ VLOG_WARN("datapath %s already exists but cannot be opened: %s",
+ name, strerror(error));
+ }
+ } else if (error) {
+ VLOG_WARN("failed to create datapath %s: %s", name, strerror(error));
+ }
+ return error;
+}
+
+/* Closes and frees the connection to 'wdp'. Does not destroy the wdp
+ * itself; call wdp_delete() first, instead, if that is desirable. */
+void
+wdp_close(struct wdp *wdp)
+{
+ if (wdp) {
+ struct registered_wdp_class *registered_class;
+
+ registered_class = shash_find_data(&wdp_classes,
+ wdp->wdp_class->type);
+ assert(registered_class);
+ assert(registered_class->refcount);
+
+ registered_class->refcount--;
+ wdp_uninit(wdp, true);
+ }
+}
+
+/* Returns the name of datapath 'wdp' prefixed with the type
+ * (for use in log messages). */
+const char *
+wdp_name(const struct wdp *wdp)
+{
+ return wdp->full_name;
+}
+
+/* Returns the name of datapath 'wdp' without the type
+ * (for use in device names). */
+const char *
+wdp_base_name(const struct wdp *wdp)
+{
+ return wdp->base_name;
+}
+
+/* Enumerates all names that may be used to open 'wdp' into 'all_names'. The
+ * Linux datapath, for example, supports opening a datapath both by number,
+ * e.g. "wdp0", and by the name of the datapath's local port. For some
+ * datapaths, this might be an infinite set (e.g. in a file name, slashes may
+ * be duplicated any number of times), in which case only the names most likely
+ * to be used will be enumerated.
+ *
+ * The caller must already have initialized 'all_names'. Any existing names in
+ * 'all_names' will not be disturbed. */
+int
+wdp_get_all_names(const struct wdp *wdp, struct svec *all_names)
+{
+ if (wdp->wdp_class->get_all_names) {
+ int error = wdp->wdp_class->get_all_names(wdp, all_names);
+ if (error) {
+ VLOG_WARN_RL(&error_rl,
+ "failed to retrieve names for datpath %s: %s",
+ wdp_name(wdp), strerror(error));
+ }
+ return error;
+ } else {
+ svec_add(all_names, wdp_base_name(wdp));
+ return 0;
+ }
+}
+
+/* Destroys the datapath that 'wdp' is connected to, first removing all of
+ * its ports. After calling this function, it does not make sense to pass
+ * 'wdp' to any functions other than wdp_name() or wdp_close(). */
+int
+wdp_delete(struct wdp *wdp)
+{
+ int error;
+
+ COVERAGE_INC(wdp_destroy);
+
+ error = wdp->wdp_class->destroy(wdp);
+ log_operation(wdp, "delete", error);
+ return error;
+}
+
+/* Obtains the set of features supported by 'wdp'.
+ *
+ * If successful, returns 0 and stores in '*featuresp' a newly allocated
+ * "struct ofp_switch_features" that describes the features and ports supported
+ * by 'wdp'. The caller is responsible for initializing the header,
+ * datapath_id, and n_buffers members of the returned "struct
+ * ofp_switch_features". The caller must free the returned buffer (with
+ * ofpbuf_delete()) when it is no longer needed.
+ *
+ * On error, returns an OpenFlow error code (as constructed by ofp_mkerr()) and
+ * sets '*featuresp' to NULL. */
+int
+wdp_get_features(const struct wdp *wdp, struct ofpbuf **featuresp)
+{
+ int error = wdp->wdp_class->get_features(wdp, featuresp);
+ if (error) {
+ *featuresp = NULL;
+ }
+ return error;
+}
+
+/* Retrieves statistics for 'wdp' into 'stats'. Returns 0 if successful,
+ * otherwise a positive errno value. On error, clears 'stats' to
+ * all-bits-zero. */
+int
+wdp_get_wdp_stats(const struct wdp *wdp, struct wdp_stats *stats)
+{
+ int error = wdp->wdp_class->get_stats(wdp, stats);
+ if (error) {
+ memset(stats, 0, sizeof *stats);
+ }
+ log_operation(wdp, "get_stats", error);
+ return error;
+}
+
+/* Retrieves the current IP fragment handling policy for 'wdp' into
+ * '*drop_frags': true indicates that fragments are dropped, false indicates
+ * that fragments are treated in the same way as other IP packets (except that
+ * the L4 header cannot be read). Returns 0 if successful, otherwise a
+ * positive errno value. */
+int
+wdp_get_drop_frags(const struct wdp *wdp, bool *drop_frags)
+{
+ int error = wdp->wdp_class->get_drop_frags(wdp, drop_frags);
+ if (error) {
+ *drop_frags = false;
+ }
+ log_operation(wdp, "get_drop_frags", error);
+ return error;
+}
+
+/* Changes 'wdp''s treatment of IP fragments to 'drop_frags', whose meaning is
+ * the same as for the get_drop_frags member function. Returns 0 if
+ * successful, otherwise a positive errno value. EOPNOTSUPP indicates that
+ * 'wdp''s fragment dropping policy is not configurable. */
+int
+wdp_set_drop_frags(struct wdp *wdp, bool drop_frags)
+{
+ int error;
+ error = (wdp->wdp_class->set_drop_frags
+ ? wdp->wdp_class->set_drop_frags(wdp, drop_frags)
+ : EOPNOTSUPP);
+ log_operation(wdp, "set_drop_frags", error);
+ return error;
+}
+
+/* Clears the contents of 'port'. */
+void
+wdp_port_clear(struct wdp_port *port)
+{
+ memset(port, 0, sizeof *port);
+}
+
+/* Makes a deep copy of 'old' in 'port'. The caller may free 'port''s data
+ * with wdp_port_free(). */
+void
+wdp_port_copy(struct wdp_port *port, const struct wdp_port *old)
+{
+ port->netdev = old->netdev ? netdev_reopen(old->netdev) : NULL;
+ port->opp = old->opp;
+ port->devname = old->devname ? xstrdup(old->devname) : NULL;
+ port->internal = old->internal;
+}
+
+/* Frees the data that 'port' points to (but not 'port' itself). */
+void
+wdp_port_free(struct wdp_port *port)
+{
+ if (port) {
+ netdev_close(port->netdev);
+ free(port->devname);
+ }
+}
+
+/* Frees the data that each of the 'n' ports in 'ports' points to, and then
+ * frees 'ports' itself. */
+void
+wdp_port_array_free(struct wdp_port *ports, size_t n)
+{
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ wdp_port_free(&ports[i]);
+ }
+ free(ports);
+}
+
+/* Attempts to add 'devname' as a port on 'wdp':
+ *
+ * - If 'internal' is true, attempts to create a new internal port (a virtual
+ * port implemented in software) by that name.
+ *
+ * - If 'internal' is false, 'devname' must name an existing network device.
+ *
+ * If successful, returns 0 and sets '*port_nop' to the new port's OpenFlow
+ * port number (if 'port_nop' is non-null). On failure, returns a positive
+ * errno value and sets '*port_nop' to OFPP_NONE (if 'port_nop' is non-null).
+ *
+ * Some wildcarded datapaths might have fixed sets of ports. For these
+ * datapaths this function will always fail.
+ *
+ * Possible error return values include:
+ *
+ * - ENODEV: No device named 'devname' exists (if 'internal' is false).
+ *
+ * - EEXIST: A device named 'devname' already exists (if 'internal' is true).
+ *
+ * - EINVAL: Device 'devname' is not supported as part of a datapath (e.g. it
+ * is not an Ethernet device), or 'devname' is too long for a network
+ * device name (if 'internal' is true)
+ *
+ * - EFBIG: The datapath already has as many ports as it can support.
+ *
+ * - EOPNOTSUPP: 'wdp' has a fixed set of ports.
+ */
+int
+wdp_port_add(struct wdp *wdp, const char *devname,
+ bool internal, uint16_t *port_nop)
+{
+ uint16_t port_no;
+ int error;
+
+ COVERAGE_INC(wdp_port_add);
+
+ error = (wdp->wdp_class->port_add
+ ? wdp->wdp_class->port_add(wdp, devname, internal, &port_no)
+ : EOPNOTSUPP);
+ if (!error) {
+ VLOG_DBG_RL(&wdpmsg_rl, "%s: added %s as port %"PRIu16,
+ wdp_name(wdp), devname, port_no);
+ } else {
+ VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s",
+ wdp_name(wdp), devname, strerror(error));
+ port_no = OFPP_NONE;
+ }
+ if (port_nop) {
+ *port_nop = port_no;
+ }
+ return error;
+}
+
+/* Attempts to remove 'wdp''s port numbered 'port_no'. Returns 0 if
+ * successful, otherwise a positive errno value.
+ *
+ * Some wildcarded datapaths might have fixed sets of ports. For these
+ * datapaths this function will always fail.
+ *
+ * Possible error return values include:
+ *
+ * - EINVAL: 'port_no' is outside the valid range, or this particular port is
+ * not removable (e.g. it is the local port).
+ *
+ * - ENOENT: 'wdp' currently has no port numbered 'port_no'.
+ *
+ * - EOPNOTSUPP: 'wdp' has a fixed set of ports.
+ */
+int
+wdp_port_del(struct wdp *wdp, uint16_t port_no)
+{
+ int error;
+
+ COVERAGE_INC(wdp_port_del);
+
+ error = (wdp->wdp_class->port_del
+ ? wdp->wdp_class->port_del(wdp, port_no)
+ : EOPNOTSUPP);
+ log_operation(wdp, "port_del", error);
+ return error;
+}
+
+/* Looks up port number 'port_no' in 'wdp'. On success, returns 0 and
+ * initializes 'port' with port details. On failure, returns a positive errno
+ * value and clears the contents of 'port' (with wdp_port_clear()).
+ *
+ * The caller must not modify or free the returned wdp_port. Calling
+ * wdp_run() or wdp_port_poll() may free the returned wdp_port.
+ *
+ * Possible error return values include:
+ *
+ * - EINVAL: 'port_no' is outside the valid range.
+ *
+ * - ENOENT: 'wdp' currently has no port numbered 'port_no'.
+ */
+int
+wdp_port_query_by_number(const struct wdp *wdp, uint16_t port_no,
+ struct wdp_port *port)
+{
+ int error;
+
+ error = wdp->wdp_class->port_query_by_number(wdp, port_no, port);
+ if (!error) {
+ VLOG_DBG_RL(&wdpmsg_rl, "%s: port %"PRIu16" is device %s",
+ wdp_name(wdp), port_no, port->devname);
+ } else {
+ wdp_port_clear(port);
+ VLOG_WARN_RL(&error_rl, "%s: failed to query port %"PRIu16": %s",
+ wdp_name(wdp), port_no, strerror(error));
+ }
+ return error;
+}
+
+/* Same as wdp_port_query_by_number() except that it look for a port named
+ * 'devname' in 'wdp'.
+ *
+ * Possible error return values include:
+ *
+ * - ENODEV: No device named 'devname' exists.
+ *
+ * - ENOENT: 'devname' exists but it is not attached as a port on 'wdp'.
+ */
+int
+wdp_port_query_by_name(const struct wdp *wdp, const char *devname,
+ struct wdp_port *port)
+{
+ int error = wdp->wdp_class->port_query_by_name(wdp, devname, port);
+ if (!error) {
+ VLOG_DBG_RL(&wdpmsg_rl, "%s: device %s is on port %"PRIu16,
+ wdp_name(wdp), devname, port->opp.port_no);
+ } else {
+ wdp_port_clear(port);
+
+ /* Log level is DBG here because all the current callers are interested
+ * in whether 'wdp' actually has a port 'devname', so that it's not
+ * an issue worth logging if it doesn't. */
+ VLOG_DBG_RL(&error_rl, "%s: failed to query port %s: %s",
+ wdp_name(wdp), devname, strerror(error));
+ }
+ return error;
+}
+
+/* Looks up port number 'port_no' in 'wdp'. On success, returns 0 and stores
+ * a copy of the port's name in '*namep'. On failure, returns a positive errno
+ * value and stores NULL in '*namep'.
+ *
+ * Error return values are the same as for wdp_port_query_by_name().
+ *
+ * The caller is responsible for freeing '*namep' (with free()). */
+int
+wdp_port_get_name(struct wdp *wdp, uint16_t port_no, char **namep)
+{
+ struct wdp_port port;
+ int error;
+
+ error = wdp_port_query_by_number(wdp, port_no, &port);
+ *namep = port.devname;
+ port.devname = NULL;
+ wdp_port_free(&port);
+
+ return error;
+}
+
+/* Obtains a list of all the ports in 'wdp', in no particular order.
+ *
+ * If successful, returns 0 and sets '*portsp' to point to an array of struct
+ * wdp_port and '*n_portsp' to the number of pointers in the array. On
+ * failure, returns a positive errno value and sets '*portsp' to NULL and
+ * '*n_portsp' to 0.
+ *
+ * The caller is responsible for freeing '*portsp' and the individual wdp_port
+ * structures, e.g. with wdp_port_array_free(). */
+int
+wdp_port_list(const struct wdp *wdp,
+ struct wdp_port **portsp, size_t *n_portsp)
+{
+ int error;
+
+ error = wdp->wdp_class->port_list(wdp, portsp, n_portsp);
+ if (error) {
+ *portsp = NULL;
+ *n_portsp = 0;
+ }
+ log_operation(wdp, "port_list", error);
+ return error;
+}
+
+int
+wdp_port_set_config(struct wdp *wdp, uint16_t port_no, uint32_t config)
+{
+ return wdp->wdp_class->port_set_config(wdp, port_no, config);
+}
+
+/* Polls for changes in the set of ports in 'wdp'. If the set of ports in
+ * 'wdp' has changed, this function does one of the following:
+ *
+ * - Stores the name of the device that was added to or deleted from 'wdp' in
+ * '*devnamep' and returns 0. The caller is responsible for freeing
+ * '*devnamep' (with free()) when it no longer needs it.
+ *
+ * - Returns ENOBUFS and sets '*devnamep' to NULL.
+ *
+ * This function may also return 'false positives', where it returns 0 and
+ * '*devnamep' names a device that was not actually added or deleted or it
+ * returns ENOBUFS without any change.
+ *
+ * Returns EAGAIN if the set of ports in 'wdp' has not changed. May also
+ * return other positive errno values to indicate that something has gone
+ * wrong. */
+int
+wdp_port_poll(const struct wdp *wdp, char **devnamep)
+{
+ int error = (wdp->wdp_class->port_poll
+ ? wdp->wdp_class->port_poll(wdp, devnamep)
+ : EAGAIN);
+ if (error) {
+ *devnamep = NULL;
+ }
+ return error;
+}
+
+/* Arranges for the poll loop to wake up when port_poll(wdp) will return a
+ * value other than EAGAIN. */
+void
+wdp_port_poll_wait(const struct wdp *wdp)
+{
+ if (wdp->wdp_class->port_poll_wait) {
+ wdp->wdp_class->port_poll_wait(wdp);
+ }
+}
+
+/* Deletes all flows from 'wdp'. Returns 0 if successful, otherwise a
+ * positive errno value. */
+int
+wdp_flow_flush(struct wdp *wdp)
+{
+ int error;
+
+ COVERAGE_INC(wdp_flow_flush);
+
+ error = wdp->wdp_class->flow_flush(wdp);
+ log_operation(wdp, "flow_flush", error);
+ return error;
+}
+
+struct wdp_rule *
+wdp_flow_get(struct wdp *wdp, const flow_t *flow)
+{
+ return wdp->wdp_class->flow_get(wdp, flow);
+}
+
+struct wdp_rule *
+wdp_flow_match(struct wdp *wdp, const flow_t *flow)
+{
+ return wdp->wdp_class->flow_match(wdp, flow);
+}
+
+void
+wdp_flow_for_each_match(const struct wdp *wdp, const flow_t *target,
+ int include, wdp_flow_cb_func *callback, void *aux)
+{
+ wdp->wdp_class->flow_for_each_match(wdp, target, include,
+ callback, aux);
+}
+
+int
+wdp_flow_get_stats(const struct wdp *wdp, const struct wdp_rule *rule,
+ struct wdp_flow_stats *stats)
+{
+ int error = wdp->wdp_class->flow_get_stats(wdp, rule, stats);
+ if (error) {
+ memset(stats, 0, sizeof *stats);
+ }
+ return error;
+}
+
+bool
+wdp_flow_overlaps(const struct wdp *wdp, const flow_t *flow)
+{
+ return wdp->wdp_class->flow_overlaps(wdp, flow);
+}
+
+int
+wdp_flow_put(struct wdp *wdp, struct wdp_flow_put *put,
+ struct wdp_flow_stats *old_stats, struct wdp_rule **rulep)
+{
+ int error = wdp->wdp_class->flow_put(wdp, put, old_stats, rulep);
+ if (error) {
+ if (old_stats) {
+ memset(old_stats, 0, sizeof *old_stats);
+ }
+ if (rulep) {
+ *rulep = NULL;
+ }
+ }
+ return error;
+}
+
+int
+wdp_flow_delete(struct wdp *wdp, struct wdp_rule *rule,
+ struct wdp_flow_stats *final_stats)
+{
+ int error = wdp->wdp_class->flow_delete(wdp, rule, final_stats);
+ if (error && final_stats) {
+ memset(final_stats, 0, sizeof *final_stats);
+ }
+ return error;
+}
+
+int
+wdp_flow_inject(struct wdp *wdp, struct wdp_rule *rule,
+ uint16_t in_port, const struct ofpbuf *packet)
+{
+ return wdp->wdp_class->flow_inject(wdp, rule, in_port, packet);
+}
+
+int
+wdp_execute(struct wdp *wdp, uint16_t in_port,
+ const union ofp_action actions[], size_t n_actions,
+ const struct ofpbuf *buf)
+{
+ int error;
+
+ COVERAGE_INC(wdp_execute);
+ if (n_actions > 0) {
+ error = wdp->wdp_class->execute(wdp, in_port, actions,
+ n_actions, buf);
+ } else {
+ error = 0;
+ }
+ return error;
+}
+
+/* Retrieves 'wdp''s "listen mask" into '*listen_mask'. Each bit set in
+ * '*listen_mask' indicates that wdp_recv() will receive messages of the
+ * corresponding WDP_CHAN_* type. Returns 0 if successful, otherwise a
+ * positive errno value. */
+int
+wdp_recv_get_mask(const struct wdp *wdp, int *listen_mask)
+{
+ int error = wdp->wdp_class->recv_get_mask(wdp, listen_mask);
+ if (error) {
+ *listen_mask = 0;
+ }
+ log_operation(wdp, "recv_get_mask", error);
+ return error;
+}
+
+/* Sets 'wdp''s "listen mask" to 'listen_mask'. Each bit set in
+ * '*listen_mask' requests that wdp_recv() receive messages of the
+ * corresponding WDP_CHAN_* type. Returns 0 if successful, otherwise a
+ * positive errno value. */
+int
+wdp_recv_set_mask(struct wdp *wdp, int listen_mask)
+{
+ int error = wdp->wdp_class->recv_set_mask(wdp, listen_mask);
+ log_operation(wdp, "recv_set_mask", error);
+ return error;
+}
+
+/* Retrieve the sFlow sampling probability. '*probability' is expressed as the
+ * number of packets out of UINT_MAX to sample, e.g. probability/UINT_MAX is
+ * the probability of sampling a given packet.
+ *
+ * Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP
+ * indicates that 'wdp' does not support sFlow sampling. */
+int
+wdp_get_sflow_probability(const struct wdp *wdp, uint32_t *probability)
+{
+ int error = (wdp->wdp_class->get_sflow_probability
+ ? wdp->wdp_class->get_sflow_probability(wdp, probability)
+ : EOPNOTSUPP);
+ if (error) {
+ *probability = 0;
+ }
+ log_operation(wdp, "get_sflow_probability", error);
+ return error;
+}
+
+/* Set the sFlow sampling probability. 'probability' is expressed as the
+ * number of packets out of UINT_MAX to sample, e.g. probability/UINT_MAX is
+ * the probability of sampling a given packet.
+ *
+ * Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP
+ * indicates that 'wdp' does not support sFlow sampling. */
+int
+wdp_set_sflow_probability(struct wdp *wdp, uint32_t probability)
+{
+ int error = (wdp->wdp_class->set_sflow_probability
+ ? wdp->wdp_class->set_sflow_probability(wdp, probability)
+ : EOPNOTSUPP);
+ log_operation(wdp, "set_sflow_probability", error);
+ return error;
+}
+
+/* Attempts to receive a message from 'wdp'. If successful, stores the
+ * message into '*packetp'. Only messages of the types selected with
+ * wdp_set_listen_mask() will ordinarily be received (but if a message type
+ * is enabled and then later disabled, some stragglers might pop up).
+ *
+ * Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN
+ * if no message is immediately available. */
+int
+wdp_recv(struct wdp *wdp, struct wdp_packet *packet)
+{
+ int error = wdp->wdp_class->recv(wdp, packet);
+ if (!error) {
+ /* XXX vlog_dbg received packet */
+ } else {
+ memset(packet, 0, sizeof *packet);
+ packet->channel = -1;
+ }
+ return error;
+}
+
+/* Discards all messages that would otherwise be received by wdp_recv() on
+ * 'wdp'. Returns 0 if successful, otherwise a positive errno value. */
+int
+wdp_recv_purge(struct wdp *wdp)
+{
+ struct wdp_stats stats;
+ unsigned int i;
+ int error;
+
+ COVERAGE_INC(wdp_purge);
+
+ error = wdp_get_wdp_stats(wdp, &stats);
+ if (error) {
+ return error;
+ }
+
+ for (i = 0; i < stats.max_miss_queue + stats.max_action_queue + stats.max_sflow_queue; i++) {
+ struct wdp_packet packet;
+
+ error = wdp_recv(wdp, &packet);
+ if (error) {
+ return error == EAGAIN ? 0 : error;
+ }
+ ofpbuf_delete(packet.payload);
+ }
+ return 0;
+}
+
+/* Arranges for the poll loop to wake up when 'wdp' has a message queued to be
+ * received with wdp_recv(). */
+void
+wdp_recv_wait(struct wdp *wdp)
+{
+ wdp->wdp_class->recv_wait(wdp);
+}
+
+/* Obtains the NetFlow engine type and engine ID for 'wdp' into '*engine_type'
+ * and '*engine_id', respectively. */
+void
+wdp_get_netflow_ids(const struct wdp *wdp,
+ uint8_t *engine_type, uint8_t *engine_id)
+{
+ *engine_type = wdp->netflow_engine_type;
+ *engine_id = wdp->netflow_engine_id;
+}
+\f
++/* Returns a copy of 'old'. The packet's payload, if any, is copied as well,
++ * but if it is longer than 'trim' bytes it is truncated to that length. */
++struct wdp_packet *
++wdp_packet_clone(const struct wdp_packet *old, size_t trim)
++{
++ struct wdp_packet *new = xmemdup(old, sizeof *old);
++ if (old->payload) {
++ new->payload = ofpbuf_clone_data(old->payload->data,
++ MIN(trim, old->payload->size));
++ }
++ return new;
++}
++
+void
+wdp_packet_destroy(struct wdp_packet *packet)
+{
+ if (packet) {
+ ofpbuf_delete(packet->payload);
+ free(packet);
+ }
+}
+
+void
+wdp_init(struct wdp *wdp, const struct wdp_class *wdp_class,
+ const char *name,
+ uint8_t netflow_engine_type, uint8_t netflow_engine_id)
+{
+ wdp->wdp_class = wdp_class;
+ wdp->base_name = xstrdup(name);
+ wdp->full_name = xasprintf("%s@%s", wdp_class->type, name);
+ wdp->netflow_engine_type = netflow_engine_type;
+ wdp->netflow_engine_id = netflow_engine_id;
+}
+
+/* Undoes the results of initialization.
+ *
+ * Normally this function only needs to be called from wdp_close().
+ * However, it may be called by providers due to an error on opening
+ * that occurs after initialization. It this case wdp_close() would
+ * never be called. */
+void
+wdp_uninit(struct wdp *wdp, bool close)
+{
+ char *base_name = wdp->base_name;
+ char *full_name = wdp->full_name;
+
+ if (close) {
+ wdp->wdp_class->close(wdp);
+ }
+
+ free(base_name);
+ free(full_name);
+}
+\f
+static void
+log_operation(const struct wdp *wdp, const char *operation, int error)
+{
+ if (!error) {
+ VLOG_DBG_RL(&wdpmsg_rl, "%s: %s success", wdp_name(wdp), operation);
+ } else {
+ VLOG_WARN_RL(&error_rl, "%s: %s failed (%s)",
+ wdp_name(wdp), operation, strerror(error));
+ }
+}
--- /dev/null
- union ofp_action *actions; /* OpenFlow actions. */
- int n_actions; /* Number of elements in 'actions' array. */
+/*
+ * Copyright (c) 2010 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef WDP_H
+#define WDP_H 1
+
+#include "classifier.h"
+#include "list.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ofpbuf;
+struct svec;
+struct wdp;
+struct wdp_class;
+union ofp_action;
+
+struct wdp_table_stats {
+ /* Flows. */
+ unsigned int n_flows; /* Number of flows in table. */
+ unsigned int cur_capacity; /* Current flow table capacity. */
+ unsigned int max_capacity; /* Maximum expansion of flow table capacity. */
+
+ /* Lookups. */
+ unsigned long long int n_hit; /* Number of flow table matches. */
+ unsigned long long int n_missed; /* Number of flow table misses. */
+ unsigned long long int n_lost; /* Misses dropped due to buffer limits. */
+};
+
+struct wdp_stats {
+ struct wdp_table_stats exact;
+ struct wdp_table_stats wild;
+
+ /* Ports. */
+ unsigned int n_ports; /* Current number of ports. */
+ unsigned int max_ports; /* Maximum supported number of ports. */
+
+ /* Lookups. */
+ unsigned long long int n_frags; /* Number of dropped IP fragments. */
+
+ /* Queues. */
+ unsigned int max_miss_queue; /* Max length of WDP_CHAN_MISS queue. */
+ unsigned int max_action_queue; /* Max length of WDP_CHAN_ACTION queue. */
+ unsigned int max_sflow_queue; /* Max length of WDP_CHAN_SFLOW queue. */
+};
+
+struct wdp_rule {
+ struct cls_rule cr;
+
+ long long int created; /* Time created, in ms since the epoch. */
+ uint16_t idle_timeout; /* In seconds from time of last use. */
+ uint16_t hard_timeout; /* In seconds from time of creation. */
+
++ /* OpenFlow actions.
++ *
++ * 'n_actions' is the number of elements in the 'actions' array. A single
++ * action may take up more more than one element's worth of space.
++ *
++ * A subrule has no actions (it uses the super-rule's actions). */
++ union ofp_action *actions; /* OpenFlow actions. */
++ int n_actions; /* Number of elements in 'actions' array. */
++
+ void *client_data;
+};
+
+void wdp_rule_init(struct wdp_rule *, const union ofp_action *actions,
+ size_t n_actions);
+void wdp_rule_uninit(struct wdp_rule *);
+\f
+void wdp_run(void);
+void wdp_wait(void);
+
+int wdp_register_provider(const struct wdp_class *);
+int wdp_unregister_provider(const char *type);
+void wdp_enumerate_types(struct svec *types);
+
+int wdp_enumerate_names(const char *type, struct svec *names);
+void wdp_parse_name(const char *datapath_name, char **name, char **type);
+
+void wdp_run_expiration(struct wdp *);
+void wdp_run_revalidation(struct wdp *, bool revalidate_all);
+
+int wdp_open(const char *name, const char *type, struct wdp **);
+int wdp_create(const char *name, const char *type, struct wdp **);
+int wdp_create_and_open(const char *name, const char *type, struct wdp **);
+void wdp_close(struct wdp *);
+
+const char *wdp_name(const struct wdp *);
+const char *wdp_base_name(const struct wdp *);
+int wdp_get_all_names(const struct wdp *, struct svec *);
+
+int wdp_delete(struct wdp *);
+
+int wdp_get_features(const struct wdp *, struct ofpbuf **featuresp);
+int wdp_get_wdp_stats(const struct wdp *, struct wdp_stats *);
+
+int wdp_get_drop_frags(const struct wdp *, bool *drop_frags);
+int wdp_set_drop_frags(struct wdp *, bool drop_frags);
+
+struct wdp_port {
+ struct netdev *netdev;
+ struct ofp_phy_port opp; /* In *host* byte order. */
+ char *devname; /* Network device name. */
+ bool internal;
+};
+void wdp_port_clear(struct wdp_port *);
+void wdp_port_copy(struct wdp_port *, const struct wdp_port *);
+void wdp_port_free(struct wdp_port *);
+void wdp_port_array_free(struct wdp_port *, size_t n);
+
+int wdp_port_add(struct wdp *, const char *devname, bool internal,
+ uint16_t *port_no);
+int wdp_port_del(struct wdp *, uint16_t port_no);
+int wdp_port_query_by_number(const struct wdp *, uint16_t port_no,
+ struct wdp_port *);
+int wdp_port_query_by_name(const struct wdp *, const char *devname,
+ struct wdp_port *);
+int wdp_port_get_name(struct wdp *, uint16_t port_no, char **namep);
+int wdp_port_list(const struct wdp *, struct wdp_port **, size_t *n_ports);
+
+int wdp_port_set_config(struct wdp *, uint16_t port_no, uint32_t config);
+
+int wdp_port_poll(const struct wdp *, char **devnamep);
+void wdp_port_poll_wait(const struct wdp *);
+
+int wdp_flow_flush(struct wdp *);
+
+struct wdp_flow_stats {
+ unsigned long long int n_packets; /* Number of matched packets. */
+ unsigned long long int n_bytes; /* Number of matched bytes. */
+ long long int inserted; /* Time inserted into flow table. */
+ long long int used; /* Time last used. */
+ uint8_t tcp_flags; /* Bitwise-OR of TCP flags seen. */
+ uint8_t ip_tos; /* IP TOS for most recent packet. */
+};
+
+/* Finding and inspecting flows. */
+struct wdp_rule *wdp_flow_get(struct wdp *, const flow_t *);
+struct wdp_rule *wdp_flow_match(struct wdp *, const flow_t *);
+
+typedef void wdp_flow_cb_func(struct wdp_rule *, void *aux);
+void wdp_flow_for_each_match(const struct wdp *, const flow_t *,
+ int include, wdp_flow_cb_func *, void *aux);
+
+int wdp_flow_get_stats(const struct wdp *, const struct wdp_rule *,
+ struct wdp_flow_stats *);
+bool wdp_flow_overlaps(const struct wdp *, const flow_t *);
+
+/* Modifying flows. */
+enum wdp_flow_put_flags {
+ /* At least one of these flags should be set. */
+ WDP_PUT_CREATE = 1 << 0, /* Allow creating a new flow. */
+ WDP_PUT_MODIFY = 1 << 1, /* Allow modifying an existing flow. */
+
+ /* Options used only for modifying existing flows. */
+ WDP_PUT_COUNTERS = 1 << 2, /* Clear counters, TCP flags, IP TOS, used. */
+ WDP_PUT_ACTIONS = 1 << 3, /* Update actions. */
+ WDP_PUT_INSERTED = 1 << 4, /* Update 'inserted' to current time. */
+ WDP_PUT_TIMEOUTS = 1 << 5, /* Update 'idle_timeout' and 'hard_timeout'. */
+ WDP_PUT_ALL = (WDP_PUT_COUNTERS | WDP_PUT_ACTIONS
+ | WDP_PUT_INSERTED | WDP_PUT_TIMEOUTS)
+};
+
+struct wdp_flow_put {
+ enum wdp_flow_put_flags flags;
+
+ const flow_t *flow;
+
+ const union ofp_action *actions;
+ size_t n_actions;
+
+ unsigned short int idle_timeout;
+ unsigned short int hard_timeout;
+};
+
+int wdp_flow_put(struct wdp *, struct wdp_flow_put *,
+ struct wdp_flow_stats *old_stats,
+ struct wdp_rule **rulep);
+int wdp_flow_delete(struct wdp *, struct wdp_rule *,
+ struct wdp_flow_stats *final_stats);
+
+/* Sending packets in flows. */
+int wdp_flow_inject(struct wdp *, struct wdp_rule *,
+ uint16_t in_port, const struct ofpbuf *);
+int wdp_execute(struct wdp *, uint16_t in_port,
+ const union ofp_action[], size_t n_actions,
+ const struct ofpbuf *);
+
+/* Receiving packets that miss the flow table. */
+enum wdp_channel {
+ WDP_CHAN_MISS, /* Packet missed in flow table. */
+ WDP_CHAN_ACTION, /* Packet output to OFPP_CONTROLLER. */
+ WDP_CHAN_SFLOW, /* sFlow samples. */
+ WDP_N_CHANS
+};
+
+struct wdp_packet {
+ struct list list;
+ enum wdp_channel channel;
++ uint32_t tun_id;
+ uint16_t in_port;
+ int send_len;
+ struct ofpbuf *payload;
+};
+
++struct wdp_packet *wdp_packet_clone(const struct wdp_packet *, size_t);
+void wdp_packet_destroy(struct wdp_packet *);
+
+int wdp_recv_get_mask(const struct wdp *, int *listen_mask);
+int wdp_recv_set_mask(struct wdp *, int listen_mask);
+int wdp_get_sflow_probability(const struct wdp *, uint32_t *probability);
+int wdp_set_sflow_probability(struct wdp *, uint32_t probability);
+int wdp_recv(struct wdp *, struct wdp_packet *);
+int wdp_recv_purge(struct wdp *);
+void wdp_recv_wait(struct wdp *);
+
+void wdp_get_netflow_ids(const struct wdp *,
+ uint8_t *engine_type, uint8_t *engine_id);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* wdp.h */
}
rule = xzalloc(sizeof *rule);
- cls_rule_from_flow(&rule->cls_rule, &flow);
- cls_rule_from_flow(&flow, wildcards, !wildcards ? UINT_MAX : priority,
- &rule->cls_rule);
++ cls_rule_from_flow(&flow, &rule->cls_rule);
return rule;
}
ovs_fatal(retval, "error reading pcap file");
}
- flow_extract(packet, 1, &flow);
- flow_to_match(&flow, &extracted_match);
+ flow_extract(packet, 0, 1, &flow);
- flow_to_match(&flow, 0, false, &extracted_match);
++ flow_to_match(&flow, false, &extracted_match);
if (memcmp(&expected_match, &extracted_match, sizeof expected_match)) {
char *exp_s = ofp_match_to_string(&expected_match, 2);
f->actions = actions;
f->n_actions = MAX_ACTIONS;
- xfif_flow_get(xfif, f);
- if (!dpif_flow_get(dpif, f)) {
++ if (!xfif_flow_get(xfif, f)) {
+
- ds_clear(&ds);
- format_xflow_flow(&ds, f);
- printf("%s\n", ds_cstr(&ds));
+ ds_clear(&ds);
- format_odp_flow(&ds, f);
++ format_xflow_flow(&ds, f);
+ printf("%s\n", ds_cstr(&ds));
+ }
}
ds_destroy(&ds);
- dpif_close(dpif);
+ xfif_close(xfif);
}
static void
}
static void
- open_vconn(const char *name, struct vconn **vconnp)
+ open_vconn__(const char *name, const char *default_suffix,
+ struct vconn **vconnp)
{
- struct dpif *dpif;
+ struct xfif *xfif;
struct stat s;
char *bridge_path, *datapath_name, *datapath_type;
- bridge_path = xasprintf("%s/%s.mgmt", ovs_rundir, name);
+ bridge_path = xasprintf("%s/%s.%s", ovs_rundir, name, default_suffix);
- dp_parse_name(name, &datapath_name, &datapath_type);
+ xf_parse_name(name, &datapath_name, &datapath_type);
if (strstr(name, ":")) {
run(vconn_open_block(name, OFP_VERSION, vconnp),
open_vconn_socket(name, vconnp);
} else if (!stat(bridge_path, &s) && S_ISSOCK(s.st_mode)) {
open_vconn_socket(bridge_path, vconnp);
- } else if (!dpif_open(datapath_name, datapath_type, &dpif)) {
- char dpif_name[IF_NAMESIZE + 1];
+ } else if (!xfif_open(datapath_name, datapath_type, &xfif)) {
+ char xfif_name[IF_NAMESIZE + 1];
char *socket_name;
- run(dpif_port_get_name(dpif, ODPP_LOCAL, dpif_name, sizeof dpif_name),
- "obtaining name of %s", dpif_name);
- dpif_close(dpif);
- if (strcmp(dpif_name, name)) {
- VLOG_INFO("datapath %s is named %s", name, dpif_name);
+ run(xfif_port_get_name(xfif, XFLOWP_LOCAL, xfif_name, sizeof xfif_name),
+ "obtaining name of %s", xfif_name);
+ xfif_close(xfif);
+ if (strcmp(xfif_name, name)) {
+ VLOG_INFO("datapath %s is named %s", name, xfif_name);
}
- socket_name = xasprintf("%s/%s.mgmt", ovs_rundir, xfif_name);
+ socket_name = xasprintf("%s/%s.%s",
- ovs_rundir, dpif_name, default_suffix);
++ ovs_rundir, xfif_name, default_suffix);
if (stat(socket_name, &s)) {
ovs_fatal(errno, "cannot connect to %s: stat failed on %s",
name, socket_name);
The mandatory \fIdatapath\fR argument argument specifies the local datapath
to relay. It takes one of the following forms:
.
-.so lib/dpif.man
+.so lib/xfif.man
.
.PP
- The optional \fIcontroller\fR argument specifies how to connect to
+ The optional \fIcontroller\fR arguments specify how to connect to
the OpenFlow controller. It takes one of the following forms:
.
.so lib/vconn-active.man
= stream_ssl_is_configured() ? "^ssl:.*" : "^tcp:.*";
}
- /* Mode of operation. */
- s->discovery = s->controller_name == NULL;
- if (s->discovery && !s->in_band) {
- ovs_fatal(0, "Cannot perform discovery with out-of-band control");
+ /* Rate limiting. */
+ if (controller_opts.rate_limit && controller_opts.rate_limit < 100) {
+ VLOG_WARN("Rate limit set to unusually low value %d",
+ controller_opts.rate_limit);
}
- /* Rate limiting. */
- if (s->rate_limit && s->rate_limit < 100) {
- VLOG_WARN("Rate limit set to unusually low value %d", s->rate_limit);
+ /* Local vconns. */
- dp_parse_name(argv[0], &s->dp_name, &s->dp_type);
++ xf_parse_name(argv[0], &s->dp_name, &s->dp_type);
+
+ /* Controllers. */
+ s->n_controllers = argc > 1 ? argc - 1 : 1;
+ s->controllers = xmalloc(s->n_controllers * sizeof *s->controllers);
+ if (argc > 1) {
+ size_t i;
+
+ for (i = 0; i < s->n_controllers; i++) {
+ s->controllers[i] = controller_opts;
+ s->controllers[i].target = argv[i + 1];
+ }
+ } else {
+ s->controllers[0] = controller_opts;
+ s->controllers[0].target = "discover";
+ }
+
+ /* Sanity check. */
+ if (controller_opts.band == OFPROTO_OUT_OF_BAND) {
+ size_t i;
+
+ for (i = 0; i < s->n_controllers; i++) {
+ if (!strcmp(s->controllers[i].target, "discover")) {
+ ovs_fatal(0, "Cannot perform discovery with out-of-band "
+ "control");
+ }
+ }
}
}
}
}
}
- svec_destroy(&dpif_names);
- svec_destroy(&dpif_types);
+ svec_destroy(&bridge_names);
+ svec_destroy(&xfif_names);
+ svec_destroy(&xfif_types);
unixctl_command_register("bridge/dump-flows", bridge_unixctl_dump_flows,
NULL);
port_destroy(br->ports[br->n_ports - 1]);
}
list_remove(&br->node);
- error = dpif_delete(br->dpif);
+ error = xfif_delete(br->xfif);
if (error && error != ENOENT) {
VLOG_ERR("failed to delete %s: %s",
- dpif_name(br->dpif), strerror(error));
+ xfif_name(br->xfif), strerror(error));
}
- dpif_close(br->dpif);
+ xfif_close(br->xfif);
ofproto_destroy(br->ofproto);
- free(br->controller);
mac_learning_destroy(br->ml);
port_array_destroy(&br->ifaces);
+ shash_destroy(&br->port_by_name);
+ shash_destroy(&br->iface_by_name);
free(br->ports);
free(br->name);
free(br);
action.output.len = htons(sizeof action);
action.output.port = htons(OFPP_NORMAL);
memset(&flow, 0, sizeof flow);
- flow.wildcards = OFPFW_ALL;
- ofproto_add_flow(br->ofproto, &flow, OVSFW_ALL, 0, &action, 1, 0);
++ flow.wildcards = OVSFW_ALL;
+ ofproto_add_flow(br->ofproto, &flow, &action, 1, 0);
+ } else {
+ struct ofproto_controller *ocs;
+ size_t i;
- ofproto_set_in_band(br->ofproto, false);
- ofproto_set_max_backoff(br->ofproto, 1);
- ofproto_set_probe_interval(br->ofproto, 5);
- ofproto_set_failure(br->ofproto, false);
- }
+ ocs = xmalloc(n_controllers * sizeof *ocs);
+ for (i = 0; i < n_controllers; i++) {
+ struct ovsrec_controller *c = controllers[i];
+ struct ofproto_controller *oc = &ocs[i];
+
+ if (strcmp(c->target, "discover")) {
+ struct iface *local_iface;
+ struct in_addr ip;
+
+ local_iface = bridge_get_local_iface(br);
+ if (local_iface && c->local_ip
+ && inet_aton(c->local_ip, &ip)) {
+ struct netdev *netdev = local_iface->netdev;
+ struct in_addr mask, gateway;
+
+ if (!c->local_netmask
+ || !inet_aton(c->local_netmask, &mask)) {
+ mask.s_addr = 0;
+ }
+ if (!c->local_gateway
+ || !inet_aton(c->local_gateway, &gateway)) {
+ gateway.s_addr = 0;
+ }
+
+ netdev_turn_flags_on(netdev, NETDEV_UP, true);
+ if (!mask.s_addr) {
+ mask.s_addr = guess_netmask(ip.s_addr);
+ }
+ if (!netdev_set_in4(netdev, ip, mask)) {
+ VLOG_INFO("bridge %s: configured IP address "IP_FMT", "
+ "netmask "IP_FMT,
+ br->name, IP_ARGS(&ip.s_addr),
+ IP_ARGS(&mask.s_addr));
+ }
+
+ if (gateway.s_addr) {
+ if (!netdev_add_router(netdev, gateway)) {
+ VLOG_INFO("bridge %s: configured gateway "IP_FMT,
+ br->name, IP_ARGS(&gateway.s_addr));
+ }
+ }
+ }
+ }
- ofproto_set_controller(br->ofproto, br->controller);
+ oc->target = c->target;
+ oc->max_backoff = c->max_backoff ? *c->max_backoff / 1000 : 8;
+ oc->probe_interval = (c->inactivity_probe
+ ? *c->inactivity_probe / 1000 : 5);
+ oc->fail = (!c->fail_mode
+ || !strcmp(c->fail_mode, "standalone")
+ || !strcmp(c->fail_mode, "open")
+ ? OFPROTO_FAIL_STANDALONE
+ : OFPROTO_FAIL_SECURE);
+ oc->band = (!c->connection_mode
+ || !strcmp(c->connection_mode, "in-band")
+ ? OFPROTO_IN_BAND
+ : OFPROTO_OUT_OF_BAND);
+ oc->accept_re = c->discover_accept_regex;
+ oc->update_resolv_conf = c->discover_update_resolv_conf;
+ oc->rate_limit = (c->controller_rate_limit
+ ? *c->controller_rate_limit : 0);
+ oc->burst_limit = (c->controller_burst_limit
+ ? *c->controller_burst_limit : 0);
+ }
+ ofproto_set_controllers(br->ofproto, ocs, n_controllers);
+ free(ocs);
+ }
}
static void
{
struct iface *in_iface;
struct port *in_port;
- struct port *out_port = NULL; /* By default, drop the packet/flow. */
int vlan;
- int out_port_idx;
/* Find the interface and port structure for the received packet. */
- in_iface = iface_from_dp_ifidx(br, flow->in_port);
+ in_iface = iface_from_xf_ifidx(br, flow->in_port);
if (!in_iface) {
/* No interface? Something fishy... */
- if (packet != NULL) {
+ if (have_packet) {
/* Odd. A few possible reasons here:
*
* - We deleted an interface but there are still a few packets
}
}
- /* MAC learning. */
- out_port = FLOOD_PORT;
+ return true;
+ }
+
+ /* If the composed actions may be applied to any packet in the given 'flow',
+ * returns true. Otherwise, the actions should only be applied to 'packet', or
+ * not at all, if 'packet' was NULL. */
+ static bool
+ process_flow(struct bridge *br, const flow_t *flow,
- const struct ofpbuf *packet, struct odp_actions *actions,
++ const struct ofpbuf *packet, struct xflow_actions *actions,
+ tag_type *tags, uint16_t *nf_output_iface)
+ {
+ struct port *in_port;
+ struct port *out_port;
+ int vlan;
+ int out_port_idx;
+
+ /* Check whether we should drop packets in this flow. */
+ if (!is_admissible(br, flow, packet != NULL, tags, &vlan, &in_port)) {
+ out_port = NULL;
+ goto done;
+ }
+
/* Learn source MAC (but don't try to learn from revalidation). */
if (packet) {
update_learning_table(br, flow, vlan, in_port);
void *br_)
{
struct bridge *br = br_;
- struct port *in_port;
- const union odp_action *a;
+ const union xflow_action *a;
+ struct port *in_port;
+ tag_type tags = 0;
+ int vlan;
/* Feed information from the active flows back into the learning table
* to ensure that table is always in sync with what is actually flowing
n_packets++;
compose_benign_packet(&packet, "Open vSwitch Bond Failover", 0xf177,
e->mac);
- flow_extract(&packet, XFLOWP_NONE, &flow);
- flow_extract(&packet, 0, ODPP_NONE, &flow);
++ flow_extract(&packet, 0, XFLOWP_NONE, &flow);
retval = ofproto_send_packet(br->ofproto, &flow, actions, a - actions,
&packet);
if (retval) {
bool del_active = port->active_iface == iface->port_ifidx;
struct iface *del;
- if (iface->dp_ifidx >= 0) {
- port_array_set(&br->ifaces, iface->dp_ifidx, NULL);
+ shash_find_and_delete_assert(&br->iface_by_name, iface->name);
+
+ if (iface->xf_ifidx >= 0) {
+ port_array_set(&br->ifaces, iface->xf_ifidx, NULL);
}
del = port->ifaces[iface->port_ifidx] = port->ifaces[--port->n_ifaces];