From: Ben Pfaff Date: Thu, 20 May 2010 20:37:37 +0000 (-0700) Subject: Merge "master" into "wdp". X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=e20567b044dd19cd0a70b5e0cfdf2bf925739f45;p=sliver-openvswitch.git Merge "master" into "wdp". --- e20567b044dd19cd0a70b5e0cfdf2bf925739f45 diff --cc datapath/Modules.mk index 211f96fc2,ab9ae2992..22616e2ea --- a/datapath/Modules.mk +++ b/datapath/Modules.mk @@@ -23,9 -26,13 +26,13 @@@ openvswitch_headers = actions.h \ compat.h \ datapath.h \ - dp_dev.h \ dp_sysfs.h \ - flow.h + flow.h \ - odp-compat.h \ + table.h \ + vport.h \ + vport-internal_dev.h \ - vport-netdev.h ++ vport-netdev.h \ ++ xflow-compat.h dist_sources = $(foreach module,$(dist_modules),$($(module)_sources)) dist_headers = $(foreach module,$(dist_modules),$($(module)_headers)) diff --cc datapath/actions.c index 64e858849,fed9830fe..baa58b7db --- a/datapath/actions.c +++ b/datapath/actions.c @@@ -18,10 -18,11 +18,11 @@@ #include #include #include - #include "datapath.h" - #include "dp_dev.h" + #include "actions.h" + #include "datapath.h" -#include "openvswitch/datapath-protocol.h" +#include "openvswitch/xflow.h" + #include "vport.h" static struct sk_buff * make_writable(struct sk_buff *skb, unsigned min_headroom, gfp_t gfp) @@@ -58,6 -47,11 +47,11 @@@ return NULL; } -static void set_tunnel(struct sk_buff *skb, struct odp_flow_key *key, ++static void set_tunnel(struct sk_buff *skb, struct xflow_key *key, + __be32 tun_id) + { + OVS_CB(skb)->tun_id = key->tun_id = tun_id; + } static struct sk_buff * vlan_pull_tag(struct sk_buff *skb) @@@ -428,12 -414,12 +407,12 @@@ output_control(struct datapath *dp, str /* Send a copy of this packet up to the sFlow agent, along with extra * information about what happened to it. */ static void sflow_sample(struct datapath *dp, struct sk_buff *skb, - const union odp_action *a, int n_actions, + const union xflow_action *a, int n_actions, - gfp_t gfp, struct net_bridge_port *nbp) + gfp_t gfp, struct dp_port *dp_port) { - struct odp_sflow_sample_header *hdr; - unsigned int actlen = n_actions * sizeof(union odp_action); - unsigned int hdrlen = sizeof(struct odp_sflow_sample_header); + struct xflow_sflow_sample_header *hdr; + unsigned int actlen = n_actions * sizeof(union xflow_action); + unsigned int hdrlen = sizeof(struct xflow_sflow_sample_header); struct sk_buff *nskb; nskb = skb_copy_expand(skb, actlen + hdrlen, 0, gfp); @@@ -441,10 -427,10 +420,10 @@@ return; memcpy(__skb_push(nskb, actlen), a, actlen); - hdr = (struct odp_sflow_sample_header*)__skb_push(nskb, hdrlen); + hdr = (struct xflow_sflow_sample_header*)__skb_push(nskb, hdrlen); hdr->n_actions = n_actions; - hdr->sample_pool = atomic_read(&nbp->sflow_pool); + hdr->sample_pool = atomic_read(&dp_port->sflow_pool); - dp_output_control(dp, nskb, _ODPL_SFLOW_NR, 0); + dp_output_control(dp, nskb, _XFLOWL_SFLOW_NR, 0); } /* Execute a list of actions against 'skb'. */ @@@ -495,7 -483,12 +476,11 @@@ int execute_actions(struct datapath *dp } break; - case ODPAT_SET_TUNNEL: ++ case XFLOWAT_SET_TUNNEL: + set_tunnel(skb, key, a->tunnel.tun_id); + break; + - case ODPAT_SET_VLAN_VID: - case ODPAT_SET_VLAN_PCP: + case XFLOWAT_SET_DL_TCI: skb = modify_vlan_tci(dp, skb, key, a, n_actions, gfp); if (IS_ERR(skb)) return PTR_ERR(skb); diff --cc datapath/actions.h index 9ad048692,9dfca3659..a1114649a --- a/datapath/actions.h +++ b/datapath/actions.h @@@ -13,13 -14,30 +14,30 @@@ struct datapath; struct sk_buff; -struct odp_flow_key; -union odp_action; +struct xflow_key; +union xflow_action; - int dp_xmit_skb(struct sk_buff *); int execute_actions(struct datapath *dp, struct sk_buff *skb, - struct odp_flow_key *key, - const union odp_action *, int n_actions, + struct xflow_key *key, + const union xflow_action *, int n_actions, gfp_t gfp); + static inline void + set_skb_csum_bits(const struct sk_buff *old_skb, struct sk_buff *new_skb) + { + #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) + /* Before 2.6.24 these fields were not copied when + * doing an skb_copy_expand. */ + new_skb->ip_summed = old_skb->ip_summed; + new_skb->csum = old_skb->csum; + #endif + #if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID) + /* These fields are copied in skb_clone but not in + * skb_copy or related functions. We need to manually + * copy them over here. */ + new_skb->proto_data_valid = old_skb->proto_data_valid; + new_skb->proto_csum_blank = old_skb->proto_csum_blank; + #endif + } + #endif /* actions.h */ diff --cc datapath/datapath.c index 7f02908e0,1d007b04b..b1d69a1e8 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@@ -42,13 -40,15 +40,15 @@@ #include #include #include - #include + #include -#include "openvswitch/datapath-protocol.h" +#include "openvswitch/xflow.h" #include "datapath.h" #include "actions.h" - #include "dp_dev.h" #include "flow.h" -#include "odp-compat.h" ++#include "xflow-compat.h" + #include "table.h" + #include "vport-internal_dev.h" #include "compat.h" @@@ -62,16 -62,16 +62,16 @@@ EXPORT_SYMBOL(dp_ioctl_hook) * dp_mutex nests inside the RTNL lock: if you need both you must take the RTNL * lock first. * - * It is safe to access the datapath and net_bridge_port structures with just + * It is safe to access the datapath and dp_port structures with just * dp_mutex. */ -static struct datapath *dps[ODP_MAX]; +static struct datapath *dps[XFLOW_MAX]; static DEFINE_MUTEX(dp_mutex); /* Number of milliseconds between runs of the maintenance thread. */ #define MAINT_SLEEP_MSECS 1000 - static int new_nbp(struct datapath *, struct net_device *, int port_no); -static int new_dp_port(struct datapath *, struct odp_port *, int port_no); ++static int new_dp_port(struct datapath *, struct xflow_port *, int port_no); /* Must be called with rcu_read_lock or dp_mutex. */ struct datapath *get_dp(int dp_idx) @@@ -94,6 -94,12 +94,12 @@@ static struct datapath *get_dp_locked(i return dp; } + /* Must be called with rcu_read_lock or RTNL lock. */ + const char *dp_name(const struct datapath *dp) + { - return vport_get_name(dp->ports[ODPP_LOCAL]->vport); ++ return vport_get_name(dp->ports[XFLOWP_LOCAL]->vport); + } + static inline size_t br_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) @@@ -121,24 -134,26 +134,26 @@@ static int dp_fill_ifinfo(struct sk_buf hdr = nlmsg_data(nlh); hdr->ifi_family = AF_BRIDGE; hdr->__ifi_pad = 0; - hdr->ifi_type = dev->type; - hdr->ifi_index = dev->ifindex; - hdr->ifi_flags = dev_get_flags(dev); + hdr->ifi_type = ARPHRD_ETHER; + hdr->ifi_index = ifindex; + hdr->ifi_flags = vport_get_flags(port->vport); hdr->ifi_change = 0; - NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); - NLA_PUT_U32(skb, IFLA_MASTER, dp->ports[XFLOWP_LOCAL]->dev->ifindex); - NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); + NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port->vport)); - NLA_PUT_U32(skb, IFLA_MASTER, vport_get_ifindex(dp->ports[ODPP_LOCAL]->vport)); ++ NLA_PUT_U32(skb, IFLA_MASTER, vport_get_ifindex(dp->ports[XFLOWP_LOCAL]->vport)); + NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port->vport)); #ifdef IFLA_OPERSTATE NLA_PUT_U8(skb, IFLA_OPERSTATE, - netif_running(dev) ? dev->operstate : IF_OPER_DOWN); + vport_is_running(port->vport) + ? vport_get_operstate(port->vport) + : IF_OPER_DOWN); #endif - if (dev->addr_len) - NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); + NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, + vport_get_addr(port->vport)); - if (dev->ifindex != dev->iflink) - NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); + if (ifindex != iflink) + NLA_PUT_U32(skb, IFLA_LINK,iflink); return nlmsg_end(skb, nlh); @@@ -183,7 -197,7 +197,7 @@@ static struct kobj_type dp_ktype = static int create_dp(int dp_idx, const char __user *devnamep) { - struct net_device *dp_dev; - struct odp_port internal_dev_port; ++ struct xflow_port internal_dev_port; char devname[IFNAMSIZ]; struct datapath *dp; int err; @@@ -234,14 -252,14 +252,14 @@@ goto err_free_dp; /* Set up our datapath device. */ - dp_dev = dp_dev_create(dp, devname, XFLOWP_LOCAL); - err = PTR_ERR(dp_dev); - if (IS_ERR(dp_dev)) - goto err_destroy_table; - - err = new_nbp(dp, dp_dev, XFLOWP_LOCAL); + BUILD_BUG_ON(sizeof(internal_dev_port.devname) != sizeof(devname)); + strcpy(internal_dev_port.devname, devname); - internal_dev_port.flags = ODP_PORT_INTERNAL; - err = new_dp_port(dp, &internal_dev_port, ODPP_LOCAL); ++ internal_dev_port.flags = XFLOW_PORT_INTERNAL; ++ err = new_dp_port(dp, &internal_dev_port, XFLOWP_LOCAL); if (err) { - dp_dev_destroy(dp_dev); + if (err == -EBUSY) + err = -EEXIST; + goto err_destroy_table; } @@@ -259,9 -277,9 +277,9 @@@ return 0; err_destroy_local_port: - dp_del_port(dp->ports[XFLOWP_LOCAL]); - dp_detach_port(dp->ports[ODPP_LOCAL], 1); ++ dp_detach_port(dp->ports[XFLOWP_LOCAL], 1); err_destroy_table: - dp_table_destroy(dp->table, 0); + tbl_destroy(dp->table, NULL); err_free_dp: kfree(dp); err_put_module: @@@ -279,16 -297,16 +297,16 @@@ static void do_destroy_dp(struct datapa int i; list_for_each_entry_safe (p, n, &dp->port_list, node) - if (p->port_no != ODPP_LOCAL) + if (p->port_no != XFLOWP_LOCAL) - dp_del_port(p); + dp_detach_port(p, 1); dp_sysfs_del_dp(dp); rcu_assign_pointer(dps[dp->dp_idx], NULL); - dp_del_port(dp->ports[XFLOWP_LOCAL]); - dp_detach_port(dp->ports[ODPP_LOCAL], 1); ++ dp_detach_port(dp->ports[XFLOWP_LOCAL], 1); - dp_table_destroy(dp->table, 1); + tbl_destroy(dp->table, flow_free_tbl); for (i = 0; i < DP_N_QUEUES; i++) skb_queue_purge(&dp->queues[i]); @@@ -334,12 -352,26 +352,26 @@@ static struct kobj_type brport_ktype = }; /* Called with RTNL lock and dp_mutex. */ - static int new_nbp(struct datapath *dp, struct net_device *dev, int port_no) -static int new_dp_port(struct datapath *dp, struct odp_port *odp_port, int port_no) ++static int new_dp_port(struct datapath *dp, struct xflow_port *xflow_port, int port_no) { - struct net_bridge_port *p; + struct vport *vport; + struct dp_port *p; + int err; + - vport = vport_locate(odp_port->devname); ++ vport = vport_locate(xflow_port->devname); + if (!vport) { + vport_lock(); + - if (odp_port->flags & ODP_PORT_INTERNAL) - vport = __vport_add(odp_port->devname, "internal", NULL); ++ if (xflow_port->flags & XFLOW_PORT_INTERNAL) ++ vport = __vport_add(xflow_port->devname, "internal", NULL); + else - vport = __vport_add(odp_port->devname, "netdev", NULL); ++ vport = __vport_add(xflow_port->devname, "netdev", NULL); - if (dev->br_port != NULL) - return -EBUSY; + vport_unlock(); + + if (IS_ERR(vport)) + return PTR_ERR(vport); + } p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) @@@ -374,11 -401,10 +401,10 @@@ return 0; } - static int add_port(int dp_idx, struct xflow_port __user *portp) -static int attach_port(int dp_idx, struct odp_port __user *portp) ++static int attach_port(int dp_idx, struct xflow_port __user *portp) { - struct net_device *dev; struct datapath *dp; - struct odp_port port; + struct xflow_port port; int port_no; int err; @@@ -437,11 -443,14 +443,14 @@@ out return err; } - int dp_del_port(struct net_bridge_port *p) + int dp_detach_port(struct dp_port *p, int may_delete) { + struct vport *vport = p->vport; + int err; + ASSERT_RTNL(); - if (p->port_no != ODPP_LOCAL) + if (p->port_no != XFLOWP_LOCAL) dp_sysfs_del_if(p); dp_ifinfo_notify(RTM_DELLINK, p); @@@ -531,21 -517,20 +517,20 @@@ void dp_process_received_packet(struct { struct datapath *dp = p->dp; struct dp_stats_percpu *stats; + int stats_counter_off; - struct odp_flow_key key; + struct xflow_key key; - struct sw_flow *flow; + struct tbl_node *flow_node; WARN_ON_ONCE(skb_shared(skb)); + skb_warn_if_lro(skb); - compute_ip_summed(skb, false); - - /* BHs are off so we don't have to use get_cpu()/put_cpu() here. */ - stats = percpu_ptr(dp->stats_percpu, smp_processor_id()); + OVS_CB(skb)->dp_port = p; - if (flow_extract(skb, p ? p->port_no : ODPP_NONE, &key)) { + if (flow_extract(skb, p ? p->port_no : XFLOWP_NONE, &key)) { if (dp->drop_frags) { kfree_skb(skb); - stats->n_frags++; - return; + stats_counter_off = offsetof(struct dp_stats_percpu, n_frags); + goto out; } } @@@ -555,53 -541,23 +541,23 @@@ flow_used(flow, skb); execute_actions(dp, skb, &key, acts->actions, acts->n_actions, GFP_ATOMIC); - stats->n_hit++; + stats_counter_off = offsetof(struct dp_stats_percpu, n_hit); } else { - stats->n_missed++; - dp_output_control(dp, skb, _XFLOWL_MISS_NR, 0); + stats_counter_off = offsetof(struct dp_stats_percpu, n_missed); - dp_output_control(dp, skb, _ODPL_MISS_NR, OVS_CB(skb)->tun_id); ++ dp_output_control(dp, skb, _XFLOWL_MISS_NR, OVS_CB(skb)->tun_id); } - } - /* - * Used as br_handle_frame_hook. (Cannot run bridge at the same time, even on - * different set of devices!) - */ - #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) - /* Called with rcu_read_lock and bottom-halves disabled. */ - static struct sk_buff *dp_frame_hook(struct net_bridge_port *p, - struct sk_buff *skb) - { - do_port_input(p, skb); - return NULL; - } - #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) - /* Called with rcu_read_lock and bottom-halves disabled. */ - static int dp_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb) - { - do_port_input(p, *pskb); - return 1; + out: + local_bh_disable(); + stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id()); + (*(u64 *)((u8 *)stats + stats_counter_off))++; + local_bh_enable(); } - #else - #error - #endif #if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID) - /* This code is based on a skb_checksum_setup from net/dev/core.c from a - * combination of Lenny's 2.6.26 Xen kernel and Xen's - * linux-2.6.18-92.1.10.el5.xs5.0.0.394.644. We can't call this function - * directly because it isn't exported in all versions. */ - static int skb_pull_up_to(struct sk_buff *skb, void *ptr) - { - if (ptr < (void *)skb->tail) - return 1; - if (__pskb_pull_tail(skb, - ptr - (void *)skb->data - skb_headlen(skb))) { - return 1; - } else { - return 0; - } - } - + /* This code is based on skb_checksum_setup() from Xen's net/dev/core.c. We + * can't call this function directly because it isn't exported in all + * versions. */ int vswitch_skb_checksum_setup(struct sk_buff *skb) { struct iphdr *iph; @@@ -778,16 -732,13 +732,13 @@@ queue_control_packets(struct sk_buff *s int port_no; int err; - port_no = XFLOWP_LOCAL; - if (skb->dev) { - if (skb->dev->br_port) - port_no = skb->dev->br_port->port_no; - else if (is_dp_dev(skb->dev)) - port_no = dp_dev_priv(skb->dev)->port_no; - } + if (OVS_CB(skb)->dp_port) + port_no = OVS_CB(skb)->dp_port->port_no; + else - port_no = ODPP_LOCAL; ++ port_no = XFLOWP_LOCAL; do { - struct odp_msg *header; + struct xflow_msg *header; nskb = skb->next; skb->next = NULL; @@@ -998,26 -952,39 +953,37 @@@ static void clear_stats(struct sw_flow flow->byte_count = 0; } - static int put_flow(struct datapath *dp, struct xflow_flow_put __user *ufp) + static int expand_table(struct datapath *dp) { - struct xflow_flow_put uf; + struct tbl *old_table = rcu_dereference(dp->table); + struct tbl *new_table; + + new_table = tbl_expand(old_table); + if (IS_ERR(new_table)) + return PTR_ERR(new_table); + + rcu_assign_pointer(dp->table, new_table); + tbl_deferred_destroy(old_table, NULL); + + return 0; + } + -static int do_put_flow(struct datapath *dp, struct odp_flow_put *uf, - struct odp_flow_stats *stats) ++static int do_put_flow(struct datapath *dp, struct xflow_flow_put *uf, ++ struct xflow_flow_stats *stats) + { + struct tbl_node *flow_node; struct sw_flow *flow; - struct dp_table *table; - struct xflow_flow_stats stats; + struct tbl *table; int error; - error = -EFAULT; - if (copy_from_user(&uf, ufp, sizeof(struct xflow_flow_put))) - goto error; - memset(uf->flow.key.reserved, 0, sizeof uf->flow.key.reserved); -- table = rcu_dereference(dp->table); - flow = dp_table_lookup(table, &uf.flow.key); - if (!flow) { + flow_node = tbl_lookup(table, &uf->flow.key, flow_hash(&uf->flow.key), flow_cmp); + if (!flow_node) { /* No such flow. */ struct sw_flow_actions *acts; error = -ENOENT; - if (!(uf.flags & XFLOWPF_CREATE)) - if (!(uf->flags & ODPPF_CREATE)) ++ if (!(uf->flags & XFLOWPF_CREATE)) goto error; /* Expand table, if necessary, to make room. */ @@@ -1049,11 -1012,11 +1011,11 @@@ rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ - error = dp_table_insert(table, flow); + error = tbl_insert(table, &flow->tbl_node, flow_hash(&flow->key)); if (error) goto error_free_flow_acts; - dp->n_flows++; - memset(&stats, 0, sizeof(struct xflow_flow_stats)); + - memset(stats, 0, sizeof(struct odp_flow_stats)); ++ memset(stats, 0, sizeof(struct xflow_flow_stats)); } else { /* We found a matching flow. */ struct sw_flow_actions *old_acts, *new_acts; @@@ -1061,7 -1025,7 +1024,7 @@@ /* Bail out if we're not allowed to modify an existing flow. */ error = -EEXIST; - if (!(uf.flags & XFLOWPF_MODIFY)) - if (!(uf->flags & ODPPF_MODIFY)) ++ if (!(uf->flags & XFLOWPF_MODIFY)) goto error; /* Swap actions. */ @@@ -1080,17 -1044,13 +1043,13 @@@ } /* Fetch stats, then clear them if necessary. */ - spin_lock_irqsave(&flow->lock, flags); - get_stats(flow, &stats); - if (uf.flags & XFLOWPF_ZERO_STATS) + spin_lock_bh(&flow->lock); + get_stats(flow, stats); - if (uf->flags & ODPPF_ZERO_STATS) ++ if (uf->flags & XFLOWPF_ZERO_STATS) clear_stats(flow); - spin_unlock_irqrestore(&flow->lock, flags); + spin_unlock_bh(&flow->lock); } - /* Copy stats to userspace. */ - if (__copy_to_user(&ufp->flow.stats, &stats, - sizeof(struct xflow_flow_stats))) - return -EFAULT; return 0; error_free_flow_acts: @@@ -1101,23 -1061,53 +1060,53 @@@ error return error; } - static int put_actions(const struct sw_flow *flow, struct xflow_flow __user *ufp) -static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp) ++static int put_flow(struct datapath *dp, struct xflow_flow_put __user *ufp) + { - struct odp_flow_stats stats; - struct odp_flow_put uf; ++ struct xflow_flow_stats stats; ++ struct xflow_flow_put uf; + int error; + - if (copy_from_user(&uf, ufp, sizeof(struct odp_flow_put))) ++ if (copy_from_user(&uf, ufp, sizeof(struct xflow_flow_put))) + return -EFAULT; + + error = do_put_flow(dp, &uf, &stats); + if (error) + return error; + + if (copy_to_user(&ufp->flow.stats, &stats, - sizeof(struct odp_flow_stats))) ++ sizeof(struct xflow_flow_stats))) + return -EFAULT; + + return 0; + } + + static int do_answer_query(struct sw_flow *flow, u32 query_flags, - struct odp_flow_stats __user *ustats, - union odp_action __user *actions, ++ struct xflow_flow_stats __user *ustats, ++ union xflow_action __user *actions, + u32 __user *n_actionsp) { - union xflow_action __user *actions; struct sw_flow_actions *sf_acts; - struct odp_flow_stats stats; ++ struct xflow_flow_stats stats; u32 n_actions; - if (__get_user(actions, &ufp->actions) || - __get_user(n_actions, &ufp->n_actions)) + spin_lock_bh(&flow->lock); + get_stats(flow, &stats); - if (query_flags & ODPFF_ZERO_TCP_FLAGS) ++ if (query_flags & XFLOWFF_ZERO_TCP_FLAGS) + flow->tcp_flags = 0; + + spin_unlock_bh(&flow->lock); + - if (copy_to_user(ustats, &stats, sizeof(struct odp_flow_stats)) || ++ if (copy_to_user(ustats, &stats, sizeof(struct xflow_flow_stats)) || + get_user(n_actions, n_actionsp)) return -EFAULT; if (!n_actions) return 0; sf_acts = rcu_dereference(flow->sf_acts); - if (__put_user(sf_acts->n_actions, &ufp->n_actions) || + if (put_user(sf_acts->n_actions, n_actionsp) || (actions && copy_to_user(actions, sf_acts->actions, - sizeof(union odp_action) * + sizeof(union xflow_action) * min(sf_acts->n_actions, n_actions)))) return -EFAULT; @@@ -1125,75 -1115,77 +1114,75 @@@ } static int answer_query(struct sw_flow *flow, u32 query_flags, - struct odp_flow __user *ufp) + struct xflow_flow __user *ufp) { - struct xflow_flow_stats stats; - unsigned long int flags; - union odp_action *actions; ++ union xflow_action *actions; - spin_lock_irqsave(&flow->lock, flags); - get_stats(flow, &stats); + if (get_user(actions, &ufp->actions)) + return -EFAULT; - if (query_flags & XFLOWFF_ZERO_TCP_FLAGS) { - flow->tcp_flags = 0; - } - spin_unlock_irqrestore(&flow->lock, flags); + return do_answer_query(flow, query_flags, + &ufp->stats, actions, &ufp->n_actions); + } - if (__copy_to_user(&ufp->stats, &stats, sizeof(struct xflow_flow_stats))) - return -EFAULT; - return put_actions(flow, ufp); -static struct sw_flow *do_del_flow(struct datapath *dp, struct odp_flow_key *key) ++static struct sw_flow *do_del_flow(struct datapath *dp, struct xflow_key *key) + { + struct tbl *table = rcu_dereference(dp->table); + struct tbl_node *flow_node; + int error; + - memset(key->reserved, 0, sizeof key->reserved); + flow_node = tbl_lookup(table, key, flow_hash(key), flow_cmp); + if (!flow_node) + return ERR_PTR(-ENOENT); + + error = tbl_remove(table, flow_node); + if (error) + return ERR_PTR(error); + + /* XXX Returned flow_node's statistics might lose a few packets, since + * other CPUs can be using this flow. We used to synchronize_rcu() to + * make sure that we get completely accurate stats, but that blows our + * performance, badly. */ + return flow_cast(flow_node); } -static int del_flow(struct datapath *dp, struct odp_flow __user *ufp) +static int del_flow(struct datapath *dp, struct xflow_flow __user *ufp) { - struct dp_table *table = rcu_dereference(dp->table); - struct xflow_flow uf; struct sw_flow *flow; - struct odp_flow uf; ++ struct xflow_flow uf; int error; - error = -EFAULT; if (copy_from_user(&uf, ufp, sizeof uf)) - goto error; - - flow = dp_table_lookup(table, &uf.key); - error = -ENOENT; - if (!flow) - goto error; + return -EFAULT; - /* XXX redundant lookup */ - error = dp_table_delete(table, flow); - if (error) - goto error; + flow = do_del_flow(dp, &uf.key); + if (IS_ERR(flow)) + return PTR_ERR(flow); - /* XXX These statistics might lose a few packets, since other CPUs can - * be using this flow. We used to synchronize_rcu() to make sure that - * we get completely accurate stats, but that blows our performance, - * badly. */ - dp->n_flows--; error = answer_query(flow, 0, ufp); flow_deferred_free(flow); - - error: return error; } - static int query_flows(struct datapath *dp, const struct xflow_flowvec *flowvec) -static int do_query_flows(struct datapath *dp, const struct odp_flowvec *flowvec) ++static int do_query_flows(struct datapath *dp, const struct xflow_flowvec *flowvec) { - struct dp_table *table = rcu_dereference(dp->table); - int i; + struct tbl *table = rcu_dereference(dp->table); + u32 i; + for (i = 0; i < flowvec->n_flows; i++) { - struct __user xflow_flow *ufp = &flowvec->flows[i]; - struct odp_flow __user *ufp = &flowvec->flows[i]; - struct odp_flow uf; ++ struct xflow_flow __user *ufp = &flowvec->flows[i]; + struct xflow_flow uf; - struct sw_flow *flow; + struct tbl_node *flow_node; int error; - if (__copy_from_user(&uf, ufp, sizeof uf)) + if (copy_from_user(&uf, ufp, sizeof uf)) return -EFAULT; - memset(uf.key.reserved, 0, sizeof uf.key.reserved); - flow = dp_table_lookup(table, &uf.key); - if (!flow) - error = __put_user(ENOENT, &ufp->stats.error); + flow_node = tbl_lookup(table, &uf.key, flow_hash(&uf.key), flow_cmp); + if (!flow_node) + error = put_user(ENOENT, &ufp->stats.error); else - error = answer_query(flow, uf.flags, ufp); + error = answer_query(flow_cast(flow_node), uf.flags, ufp); if (error) return -EFAULT; } @@@ -1201,18 -1193,19 +1190,19 @@@ } struct list_flows_cbdata { - struct odp_flow __user *uflows; + struct xflow_flow __user *uflows; - int n_flows; - int listed_flows; + u32 n_flows; + u32 listed_flows; }; - static int list_flow(struct sw_flow *flow, void *cbdata_) + static int list_flow(struct tbl_node *node, void *cbdata_) { + struct sw_flow *flow = flow_cast(node); struct list_flows_cbdata *cbdata = cbdata_; - struct odp_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++]; + struct xflow_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++]; int error; - if (__copy_to_user(&ufp->key, &flow->key, sizeof flow->key)) + if (copy_to_user(&ufp->key, &flow->key, sizeof flow->key)) return -EFAULT; error = answer_query(flow, 0, ufp); if (error) @@@ -1223,7 -1216,7 +1213,7 @@@ return 0; } - static int list_flows(struct datapath *dp, const struct xflow_flowvec *flowvec) -static int do_list_flows(struct datapath *dp, const struct odp_flowvec *flowvec) ++static int do_list_flows(struct datapath *dp, const struct xflow_flowvec *flowvec) { struct list_flows_cbdata cbdata; int error; @@@ -1241,34 -1233,28 +1230,28 @@@ static int do_flowvec_ioctl(struct datapath *dp, unsigned long argp, int (*function)(struct datapath *, - const struct odp_flowvec *)) + const struct xflow_flowvec *)) { - struct odp_flowvec __user *uflowvec; - struct odp_flowvec flowvec; + struct xflow_flowvec __user *uflowvec; + struct xflow_flowvec flowvec; int retval; - uflowvec = (struct odp_flowvec __user *)argp; + uflowvec = (struct xflow_flowvec __user *)argp; - if (!access_ok(VERIFY_WRITE, uflowvec, sizeof *uflowvec) || - copy_from_user(&flowvec, uflowvec, sizeof flowvec)) + if (copy_from_user(&flowvec, uflowvec, sizeof flowvec)) return -EFAULT; - if (flowvec.n_flows > INT_MAX / sizeof(struct odp_flow)) + if (flowvec.n_flows > INT_MAX / sizeof(struct xflow_flow)) return -EINVAL; - if (!access_ok(VERIFY_WRITE, flowvec.flows, - flowvec.n_flows * sizeof(struct xflow_flow))) - return -EFAULT; - retval = function(dp, &flowvec); return (retval < 0 ? retval : retval == flowvec.n_flows ? 0 - : __put_user(retval, &uflowvec->n_flows)); + : put_user(retval, &uflowvec->n_flows)); } - static int do_execute(struct datapath *dp, const struct xflow_execute *executep) -static int do_execute(struct datapath *dp, const struct odp_execute *execute) ++static int do_execute(struct datapath *dp, const struct xflow_execute *execute) { - struct xflow_execute execute; - struct odp_flow_key key; + struct xflow_key key; struct sk_buff *skb; struct sw_flow_actions *actions; struct ethhdr *eth; @@@ -1336,14 -1322,25 +1319,25 @@@ error return err; } -static int execute_packet(struct datapath *dp, const struct odp_execute __user *executep) ++static int execute_packet(struct datapath *dp, const struct xflow_execute __user *executep) + { - struct odp_execute execute; ++ struct xflow_execute execute; + + if (copy_from_user(&execute, executep, sizeof execute)) + return -EFAULT; + + return do_execute(dp, &execute); + } + -static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp) +static int get_dp_stats(struct datapath *dp, struct xflow_stats __user *statsp) { + struct tbl *table = rcu_dereference(dp->table); - struct odp_stats stats; + struct xflow_stats stats; int i; - stats.n_flows = dp->n_flows; - stats.cur_capacity = rcu_dereference(dp->table)->n_buckets; - stats.max_capacity = DP_MAX_BUCKETS; + stats.n_flows = tbl_count(table); + stats.cur_capacity = tbl_n_buckets(table); + stats.max_capacity = TBL_MAX_BUCKETS; stats.n_ports = dp->n_ports; stats.max_ports = DP_MAX_PORTS; stats.max_groups = DP_MAX_GROUPS; @@@ -1408,13 -1400,19 +1397,19 @@@ void set_internal_devs_mtu(const struc } static int - put_port(const struct net_bridge_port *p, struct xflow_port __user *uop) -put_port(const struct dp_port *p, struct odp_port __user *uop) ++put_port(const struct dp_port *p, struct xflow_port __user *uop) { - struct odp_port op; + struct xflow_port op; + memset(&op, 0, sizeof op); - strncpy(op.devname, p->dev->name, sizeof op.devname); + + rcu_read_lock(); + strncpy(op.devname, vport_get_name(p->vport), sizeof op.devname); + rcu_read_unlock(); + op.port = p->port_no; - op.flags = is_dp_dev(p->dev) ? XFLOW_PORT_INTERNAL : 0; - op.flags = is_internal_vport(p->vport) ? ODP_PORT_INTERNAL : 0; ++ op.flags = is_internal_vport(p->vport) ? XFLOW_PORT_INTERNAL : 0; + return copy_to_user(uop, &op, sizeof op) ? -EFAULT : 0; } @@@ -1456,25 -1465,36 +1462,36 @@@ error_unlock } static int - list_ports(struct datapath *dp, struct xflow_portvec __user *pvp) -do_list_ports(struct datapath *dp, struct odp_port __user *uports, int n_ports) ++do_list_ports(struct datapath *dp, struct xflow_port __user *uports, int n_ports) { - struct xflow_portvec pv; - struct net_bridge_port *p; - int idx; + int idx = 0; + if (n_ports) { + struct dp_port *p; - if (copy_from_user(&pv, pvp, sizeof pv)) - return -EFAULT; - - idx = 0; - if (pv.n_ports) { list_for_each_entry_rcu (p, &dp->port_list, node) { - if (put_port(p, &pv.ports[idx])) + if (put_port(p, &uports[idx])) return -EFAULT; - if (idx++ >= pv.n_ports) + if (idx++ >= n_ports) break; } } - return put_user(dp->n_ports, &pvp->n_ports); + return idx; + } + + static int -list_ports(struct datapath *dp, struct odp_portvec __user *upv) ++list_ports(struct datapath *dp, struct xflow_portvec __user *upv) + { - struct odp_portvec pv; ++ struct xflow_portvec pv; + int retval; + + if (copy_from_user(&pv, upv, sizeof pv)) + return -EFAULT; + + retval = do_list_ports(dp, pv.ports, pv.n_ports); + if (retval < 0) + return retval; + + return put_user(retval, &upv->n_ports); } /* RCU callback for freeing a dp_port_group */ @@@ -1524,11 -1537,9 +1534,9 @@@ error } static int - get_port_group(struct datapath *dp, struct xflow_port_group *upg) -set_port_group(struct datapath *dp, const struct odp_port_group __user *upg) ++set_port_group(struct datapath *dp, const struct xflow_port_group __user *upg) { - struct odp_port_group pg; + struct xflow_port_group pg; - struct dp_port_group *g; - u16 n_copy; if (copy_from_user(&pg, upg, sizeof pg)) return -EFAULT; @@@ -1547,6 -1569,16 +1566,16 @@@ do_get_port_group(struct datapath *dp return 0; } -static int get_port_group(struct datapath *dp, struct odp_port_group __user *upg) ++static int get_port_group(struct datapath *dp, struct xflow_port_group __user *upg) + { - struct odp_port_group pg; ++ struct xflow_port_group pg; + + if (copy_from_user(&pg, upg, sizeof pg)) + return -EFAULT; + + return do_get_port_group(dp, pg.ports, pg.n_ports, pg.group, &pg.n_ports); + } + static int get_listen_mask(const struct file *f) { return (long)f->private_data; @@@ -1576,14 -1608,46 +1605,46 @@@ static long openvswitch_ioctl(struct fi err = destroy_dp(dp_idx); goto exit; - case XFLOW_PORT_ADD: - err = add_port(dp_idx, (struct xflow_port __user *)argp); - case ODP_PORT_ATTACH: - err = attach_port(dp_idx, (struct odp_port __user *)argp); ++ case XFLOW_PORT_ATTACH: ++ err = attach_port(dp_idx, (struct xflow_port __user *)argp); goto exit; - case XFLOW_PORT_DEL: - case ODP_PORT_DETACH: ++ case XFLOW_PORT_DETACH: err = get_user(port_no, (int __user *)argp); if (!err) - err = del_port(dp_idx, port_no); + err = detach_port(dp_idx, port_no); + goto exit; + - case ODP_VPORT_ADD: - err = vport_add((struct odp_vport_add __user *)argp); ++ case XFLOW_VPORT_ADD: ++ err = vport_add((struct xflow_vport_add __user *)argp); + goto exit; + - case ODP_VPORT_MOD: - err = vport_mod((struct odp_vport_mod __user *)argp); ++ case XFLOW_VPORT_MOD: ++ err = vport_mod((struct xflow_vport_mod __user *)argp); + goto exit; + - case ODP_VPORT_DEL: ++ case XFLOW_VPORT_DEL: + err = vport_del((char __user *)argp); + goto exit; + - case ODP_VPORT_STATS_GET: - err = vport_stats_get((struct odp_vport_stats_req __user *)argp); ++ case XFLOW_VPORT_STATS_GET: ++ err = vport_stats_get((struct xflow_vport_stats_req __user *)argp); + goto exit; + - case ODP_VPORT_ETHER_GET: - err = vport_ether_get((struct odp_vport_ether __user *)argp); ++ case XFLOW_VPORT_ETHER_GET: ++ err = vport_ether_get((struct xflow_vport_ether __user *)argp); + goto exit; + - case ODP_VPORT_ETHER_SET: - err = vport_ether_set((struct odp_vport_ether __user *)argp); ++ case XFLOW_VPORT_ETHER_SET: ++ err = vport_ether_set((struct xflow_vport_ether __user *)argp); + goto exit; + - case ODP_VPORT_MTU_GET: - err = vport_mtu_get((struct odp_vport_mtu __user *)argp); ++ case XFLOW_VPORT_MTU_GET: ++ err = vport_mtu_get((struct xflow_vport_mtu __user *)argp); + goto exit; + - case ODP_VPORT_MTU_SET: - err = vport_mtu_set((struct odp_vport_mtu __user *)argp); ++ case XFLOW_VPORT_MTU_SET: ++ err = vport_mtu_set((struct xflow_vport_mtu __user *)argp); goto exit; } @@@ -1657,24 -1721,24 +1718,24 @@@ err = flush_flows(dp); break; - case ODP_FLOW_PUT: - err = put_flow(dp, (struct odp_flow_put __user *)argp); + case XFLOW_FLOW_PUT: + err = put_flow(dp, (struct xflow_flow_put __user *)argp); break; - case ODP_FLOW_DEL: - err = del_flow(dp, (struct odp_flow __user *)argp); + case XFLOW_FLOW_DEL: + err = del_flow(dp, (struct xflow_flow __user *)argp); break; - case ODP_FLOW_GET: + case XFLOW_FLOW_GET: - err = do_flowvec_ioctl(dp, argp, query_flows); + err = do_flowvec_ioctl(dp, argp, do_query_flows); break; - case ODP_FLOW_LIST: + case XFLOW_FLOW_LIST: - err = do_flowvec_ioctl(dp, argp, list_flows); + err = do_flowvec_ioctl(dp, argp, do_list_flows); break; - case ODP_EXECUTE: - err = execute_packet(dp, (struct odp_execute __user *)argp); + case XFLOW_EXECUTE: - err = do_execute(dp, (struct xflow_execute __user *)argp); ++ err = execute_packet(dp, (struct xflow_execute __user *)argp); break; default: @@@ -1696,6 -1760,311 +1757,311 @@@ static int dp_has_packet_of_interest(st return 0; } + #ifdef CONFIG_COMPAT -static int compat_list_ports(struct datapath *dp, struct compat_odp_portvec __user *upv) ++static int compat_list_ports(struct datapath *dp, struct compat_xflow_portvec __user *upv) + { - struct compat_odp_portvec pv; ++ struct compat_xflow_portvec pv; + int retval; + + if (copy_from_user(&pv, upv, sizeof pv)) + return -EFAULT; + + retval = do_list_ports(dp, compat_ptr(pv.ports), pv.n_ports); + if (retval < 0) + return retval; + + return put_user(retval, &upv->n_ports); + } + -static int compat_set_port_group(struct datapath *dp, const struct compat_odp_port_group __user *upg) ++static int compat_set_port_group(struct datapath *dp, const struct compat_xflow_port_group __user *upg) + { - struct compat_odp_port_group pg; ++ struct compat_xflow_port_group pg; + + if (copy_from_user(&pg, upg, sizeof pg)) + return -EFAULT; + + return do_set_port_group(dp, compat_ptr(pg.ports), pg.n_ports, pg.group); + } + -static int compat_get_port_group(struct datapath *dp, struct compat_odp_port_group __user *upg) ++static int compat_get_port_group(struct datapath *dp, struct compat_xflow_port_group __user *upg) + { - struct compat_odp_port_group pg; ++ struct compat_xflow_port_group pg; + + if (copy_from_user(&pg, upg, sizeof pg)) + return -EFAULT; + + return do_get_port_group(dp, compat_ptr(pg.ports), pg.n_ports, + pg.group, &pg.n_ports); + } + -static int compat_get_flow(struct odp_flow *flow, const struct compat_odp_flow __user *compat) ++static int compat_get_flow(struct xflow_flow *flow, const struct compat_xflow_flow __user *compat) + { + compat_uptr_t actions; + - if (!access_ok(VERIFY_READ, compat, sizeof(struct compat_odp_flow)) || - __copy_from_user(&flow->stats, &compat->stats, sizeof(struct odp_flow_stats)) || - __copy_from_user(&flow->key, &compat->key, sizeof(struct odp_flow_key)) || ++ if (!access_ok(VERIFY_READ, compat, sizeof(struct compat_xflow_flow)) || ++ __copy_from_user(&flow->stats, &compat->stats, sizeof(struct xflow_flow_stats)) || ++ __copy_from_user(&flow->key, &compat->key, sizeof(struct xflow_key)) || + __get_user(actions, &compat->actions) || + __get_user(flow->n_actions, &compat->n_actions) || + __get_user(flow->flags, &compat->flags)) + return -EFAULT; + + flow->actions = compat_ptr(actions); + return 0; + } + -static int compat_put_flow(struct datapath *dp, struct compat_odp_flow_put __user *ufp) ++static int compat_put_flow(struct datapath *dp, struct compat_xflow_flow_put __user *ufp) + { - struct odp_flow_stats stats; - struct odp_flow_put fp; ++ struct xflow_flow_stats stats; ++ struct xflow_flow_put fp; + int error; + + if (compat_get_flow(&fp.flow, &ufp->flow) || + get_user(fp.flags, &ufp->flags)) + return -EFAULT; + + error = do_put_flow(dp, &fp, &stats); + if (error) + return error; + + if (copy_to_user(&ufp->flow.stats, &stats, - sizeof(struct odp_flow_stats))) ++ sizeof(struct xflow_flow_stats))) + return -EFAULT; + + return 0; + } + + static int compat_answer_query(struct sw_flow *flow, u32 query_flags, - struct compat_odp_flow __user *ufp) ++ struct compat_xflow_flow __user *ufp) + { + compat_uptr_t actions; + + if (get_user(actions, &ufp->actions)) + return -EFAULT; + + return do_answer_query(flow, query_flags, &ufp->stats, + compat_ptr(actions), &ufp->n_actions); + } + -static int compat_del_flow(struct datapath *dp, struct compat_odp_flow __user *ufp) ++static int compat_del_flow(struct datapath *dp, struct compat_xflow_flow __user *ufp) + { + struct sw_flow *flow; - struct odp_flow uf; ++ struct xflow_flow uf; + int error; + + if (compat_get_flow(&uf, ufp)) + return -EFAULT; + + flow = do_del_flow(dp, &uf.key); + if (IS_ERR(flow)) + return PTR_ERR(flow); + + error = compat_answer_query(flow, 0, ufp); + flow_deferred_free(flow); + return error; + } + -static int compat_query_flows(struct datapath *dp, struct compat_odp_flow *flows, u32 n_flows) ++static int compat_query_flows(struct datapath *dp, struct compat_xflow_flow *flows, u32 n_flows) + { + struct tbl *table = rcu_dereference(dp->table); + u32 i; + + for (i = 0; i < n_flows; i++) { - struct compat_odp_flow __user *ufp = &flows[i]; - struct odp_flow uf; ++ struct compat_xflow_flow __user *ufp = &flows[i]; ++ struct xflow_flow uf; + struct tbl_node *flow_node; + int error; + + if (compat_get_flow(&uf, ufp)) + return -EFAULT; + memset(uf.key.reserved, 0, sizeof uf.key.reserved); + + flow_node = tbl_lookup(table, &uf.key, flow_hash(&uf.key), flow_cmp); + if (!flow_node) + error = put_user(ENOENT, &ufp->stats.error); + else + error = compat_answer_query(flow_cast(flow_node), uf.flags, ufp); + if (error) + return -EFAULT; + } + return n_flows; + } + + struct compat_list_flows_cbdata { - struct compat_odp_flow __user *uflows; ++ struct compat_xflow_flow __user *uflows; + u32 n_flows; + u32 listed_flows; + }; + + static int compat_list_flow(struct tbl_node *node, void *cbdata_) + { + struct sw_flow *flow = flow_cast(node); + struct compat_list_flows_cbdata *cbdata = cbdata_; - struct compat_odp_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++]; ++ struct compat_xflow_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++]; + int error; + + if (copy_to_user(&ufp->key, &flow->key, sizeof flow->key)) + return -EFAULT; + error = compat_answer_query(flow, 0, ufp); + if (error) + return error; + + if (cbdata->listed_flows >= cbdata->n_flows) + return cbdata->listed_flows; + return 0; + } + -static int compat_list_flows(struct datapath *dp, struct compat_odp_flow *flows, u32 n_flows) ++static int compat_list_flows(struct datapath *dp, struct compat_xflow_flow *flows, u32 n_flows) + { + struct compat_list_flows_cbdata cbdata; + int error; + + if (!n_flows) + return 0; + + cbdata.uflows = flows; + cbdata.n_flows = n_flows; + cbdata.listed_flows = 0; + error = tbl_foreach(rcu_dereference(dp->table), compat_list_flow, &cbdata); + return error ? error : cbdata.listed_flows; + } + + static int compat_flowvec_ioctl(struct datapath *dp, unsigned long argp, + int (*function)(struct datapath *, - struct compat_odp_flow *, ++ struct compat_xflow_flow *, + u32 n_flows)) + { - struct compat_odp_flowvec __user *uflowvec; - struct compat_odp_flow __user *flows; - struct compat_odp_flowvec flowvec; ++ struct compat_xflow_flowvec __user *uflowvec; ++ struct compat_xflow_flow __user *flows; ++ struct compat_xflow_flowvec flowvec; + int retval; + + uflowvec = compat_ptr(argp); + if (!access_ok(VERIFY_WRITE, uflowvec, sizeof *uflowvec) || + copy_from_user(&flowvec, uflowvec, sizeof flowvec)) + return -EFAULT; + - if (flowvec.n_flows > INT_MAX / sizeof(struct compat_odp_flow)) ++ if (flowvec.n_flows > INT_MAX / sizeof(struct compat_xflow_flow)) + return -EINVAL; + + flows = compat_ptr(flowvec.flows); + if (!access_ok(VERIFY_WRITE, flows, - flowvec.n_flows * sizeof(struct compat_odp_flow))) ++ flowvec.n_flows * sizeof(struct compat_xflow_flow))) + return -EFAULT; + + retval = function(dp, flows, flowvec.n_flows); + return (retval < 0 ? retval + : retval == flowvec.n_flows ? 0 + : put_user(retval, &uflowvec->n_flows)); + } + -static int compat_execute(struct datapath *dp, const struct compat_odp_execute __user *uexecute) ++static int compat_execute(struct datapath *dp, const struct compat_xflow_execute __user *uexecute) + { - struct odp_execute execute; ++ struct xflow_execute execute; + compat_uptr_t actions; + compat_uptr_t data; + - if (!access_ok(VERIFY_READ, uexecute, sizeof(struct compat_odp_execute)) || ++ if (!access_ok(VERIFY_READ, uexecute, sizeof(struct compat_xflow_execute)) || + __get_user(execute.in_port, &uexecute->in_port) || + __get_user(actions, &uexecute->actions) || + __get_user(execute.n_actions, &uexecute->n_actions) || + __get_user(data, &uexecute->data) || + __get_user(execute.length, &uexecute->length)) + return -EFAULT; + + execute.actions = compat_ptr(actions); + execute.data = compat_ptr(data); + + return do_execute(dp, &execute); + } + + static long openvswitch_compat_ioctl(struct file *f, unsigned int cmd, unsigned long argp) + { + int dp_idx = iminor(f->f_dentry->d_inode); + struct datapath *dp; + int err; + + switch (cmd) { - case ODP_DP_DESTROY: - case ODP_FLOW_FLUSH: ++ case XFLOW_DP_DESTROY: ++ case XFLOW_FLOW_FLUSH: + /* Ioctls that don't need any translation at all. */ + return openvswitch_ioctl(f, cmd, argp); + - case ODP_DP_CREATE: - case ODP_PORT_ATTACH: - case ODP_PORT_DETACH: - case ODP_VPORT_DEL: - case ODP_VPORT_MTU_SET: - case ODP_VPORT_MTU_GET: - case ODP_VPORT_ETHER_SET: - case ODP_VPORT_ETHER_GET: - case ODP_VPORT_STATS_GET: - case ODP_DP_STATS: - case ODP_GET_DROP_FRAGS: - case ODP_SET_DROP_FRAGS: - case ODP_SET_LISTEN_MASK: - case ODP_GET_LISTEN_MASK: - case ODP_SET_SFLOW_PROBABILITY: - case ODP_GET_SFLOW_PROBABILITY: - case ODP_PORT_QUERY: ++ case XFLOW_DP_CREATE: ++ case XFLOW_PORT_ATTACH: ++ case XFLOW_PORT_DETACH: ++ case XFLOW_VPORT_DEL: ++ case XFLOW_VPORT_MTU_SET: ++ case XFLOW_VPORT_MTU_GET: ++ case XFLOW_VPORT_ETHER_SET: ++ case XFLOW_VPORT_ETHER_GET: ++ case XFLOW_VPORT_STATS_GET: ++ case XFLOW_DP_STATS: ++ case XFLOW_GET_DROP_FRAGS: ++ case XFLOW_SET_DROP_FRAGS: ++ case XFLOW_SET_LISTEN_MASK: ++ case XFLOW_GET_LISTEN_MASK: ++ case XFLOW_SET_SFLOW_PROBABILITY: ++ case XFLOW_GET_SFLOW_PROBABILITY: ++ case XFLOW_PORT_QUERY: + /* Ioctls that just need their pointer argument extended. */ + return openvswitch_ioctl(f, cmd, (unsigned long)compat_ptr(argp)); + - case ODP_VPORT_ADD32: ++ case XFLOW_VPORT_ADD32: + return compat_vport_add(compat_ptr(argp)); + - case ODP_VPORT_MOD32: ++ case XFLOW_VPORT_MOD32: + return compat_vport_mod(compat_ptr(argp)); + } + + dp = get_dp_locked(dp_idx); + err = -ENODEV; + if (!dp) + goto exit; + + switch (cmd) { - case ODP_PORT_LIST32: ++ case XFLOW_PORT_LIST32: + err = compat_list_ports(dp, compat_ptr(argp)); + break; + - case ODP_PORT_GROUP_SET32: ++ case XFLOW_PORT_GROUP_SET32: + err = compat_set_port_group(dp, compat_ptr(argp)); + break; + - case ODP_PORT_GROUP_GET32: ++ case XFLOW_PORT_GROUP_GET32: + err = compat_get_port_group(dp, compat_ptr(argp)); + break; + - case ODP_FLOW_PUT32: ++ case XFLOW_FLOW_PUT32: + err = compat_put_flow(dp, compat_ptr(argp)); + break; + - case ODP_FLOW_DEL32: ++ case XFLOW_FLOW_DEL32: + err = compat_del_flow(dp, compat_ptr(argp)); + break; + - case ODP_FLOW_GET32: ++ case XFLOW_FLOW_GET32: + err = compat_flowvec_ioctl(dp, argp, compat_query_flows); + break; + - case ODP_FLOW_LIST32: ++ case XFLOW_FLOW_LIST32: + err = compat_flowvec_ioctl(dp, argp, compat_list_flows); + break; + - case ODP_EXECUTE32: ++ case XFLOW_EXECUTE32: + err = compat_execute(dp, compat_ptr(argp)); + break; + + default: + err = -ENOIOCTLCMD; + break; + } + mutex_unlock(&dp->mutex); + exit: + return err; + } + #endif + ssize_t openvswitch_read(struct file *f, char __user *buf, size_t nbytes, loff_t *ppos) { diff --cc datapath/datapath.h index ef5043296,8e272836e..8438e0501 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@@ -118,9 -71,9 +71,9 @@@ struct dp_port_group * @waitqueue: Waitqueue, for waiting for new packets in @queues. * @n_flows: Number of flows currently in flow table. * @table: Current flow table (RCU protected). - * @groups: Port groups, used by ODPAT_OUTPUT_GROUP action (RCU protected). + * @groups: Port groups, used by XFLOWAT_OUTPUT_GROUP action (RCU protected). * @n_ports: Number of ports currently in @ports. - * @ports: Map from port number to &struct net_bridge_port. %XFLOWP_LOCAL port - * @ports: Map from port number to &struct dp_port. %ODPP_LOCAL port ++ * @ports: Map from port number to &struct dp_port. %XFLOWP_LOCAL port * always exists, other ports may be %NULL. * @port_list: List of all ports in @ports in arbitrary order. * @stats_percpu: Per-CPU datapath statistics. diff --cc datapath/dp_sysfs_dp.c index 738d2e15d,91dd56f8f..5e1362c98 --- a/datapath/dp_sysfs_dp.c +++ b/datapath/dp_sysfs_dp.c @@@ -275,8 -280,8 +280,8 @@@ static INTERNAL_DEVICE_ATTR(root_id, S_ static ssize_t show_bridge_id(DEVICE_PARAMS, char *buf) { - struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); - const unsigned char *addr = dp->ports[XFLOWP_LOCAL]->dev->dev_addr; + struct datapath *dp = sysfs_get_dp(to_net_dev(d)); - const unsigned char *addr = vport_get_addr(dp->ports[ODPP_LOCAL]->vport); ++ const unsigned char *addr = vport_get_addr(dp->ports[XFLOWP_LOCAL]->vport); /* xxx Do we need a lock of some sort? */ return sprintf(buf, "%.2x%.2x.%.2x%.2x%.2x%.2x%.2x%.2x\n", @@@ -464,7 -469,7 +469,7 @@@ static struct attribute_group bridge_gr */ int dp_sysfs_add_dp(struct datapath *dp) { - struct kobject *kobj = &dp->ports[XFLOWP_LOCAL]->dev->NETDEV_DEV_MEMBER.kobj; - struct kobject *kobj = vport_get_kobj(dp->ports[ODPP_LOCAL]->vport); ++ struct kobject *kobj = vport_get_kobj(dp->ports[XFLOWP_LOCAL]->vport); int err; /* Create /sys/class/net//bridge directory. */ @@@ -493,7 -498,7 +498,7 @@@ int dp_sysfs_del_dp(struct datapath *dp) { - struct kobject *kobj = &dp->ports[XFLOWP_LOCAL]->dev->NETDEV_DEV_MEMBER.kobj; - struct kobject *kobj = vport_get_kobj(dp->ports[ODPP_LOCAL]->vport); ++ struct kobject *kobj = vport_get_kobj(dp->ports[XFLOWP_LOCAL]->vport); kobject_del(&dp->ifobj); sysfs_remove_group(kobj, &bridge_group); diff --cc datapath/dp_sysfs_if.c index c8e2a6f2e,e06037cbe..641496853 --- a/datapath/dp_sysfs_if.c +++ b/datapath/dp_sysfs_if.c @@@ -285,8 -290,7 +290,7 @@@ int dp_sysfs_add_if(struct dp_port *p /* Create symlink from /sys/class/net//brport/bridge to * /sys/class/net/. */ - err = sysfs_create_link(&p->kobj, - &dp->ports[XFLOWP_LOCAL]->dev->NETDEV_DEV_MEMBER.kobj, - err = sysfs_create_link(&p->kobj, vport_get_kobj(dp->ports[ODPP_LOCAL]->vport), ++ err = sysfs_create_link(&p->kobj, vport_get_kobj(dp->ports[XFLOWP_LOCAL]->vport), SYSFS_BRIDGE_PORT_LINK); /* "bridge" */ if (err) goto err_del; diff --cc datapath/flow.c index d75e7a8ca,548c729af..7265350d7 --- a/datapath/flow.c +++ b/datapath/flow.c @@@ -202,7 -209,9 +209,9 @@@ int flow_extract(struct sk_buff *skb, u int nh_ofs; memset(key, 0, sizeof *key); + key->tun_id = OVS_CB(skb)->tun_id; key->in_port = in_port; - key->dl_vlan = htons(ODP_VLAN_NONE); ++ key->dl_tci = htons(0); if (skb->len < sizeof *eth) return 0; @@@ -317,6 -327,24 +326,24 @@@ return retval; } + struct sw_flow *flow_cast(const struct tbl_node *node) + { + return container_of(node, struct sw_flow, tbl_node); + } + -u32 flow_hash(const struct odp_flow_key *key) ++u32 flow_hash(const struct xflow_key *key) + { + return jhash2((u32*)key, sizeof *key / sizeof(u32), hash_seed); + } + + int flow_cmp(const struct tbl_node *node, void *key2_) + { - const struct odp_flow_key *key1 = &flow_cast(node)->key; - const struct odp_flow_key *key2 = key2_; ++ const struct xflow_key *key1 = &flow_cast(node)->key; ++ const struct xflow_key *key2 = key2_; + - return !memcmp(key1, key2, sizeof(struct odp_flow_key)); ++ return !memcmp(key1, key2, sizeof(struct xflow_key)); + } + /* Initializes the flow module. * Returns zero if successful or a negative error code. */ int flow_init(void) diff --cc datapath/flow.h index dc1e1535c,4a393cb90..5e2da5969 --- a/datapath/flow.h +++ b/datapath/flow.h @@@ -15,7 -15,8 +15,8 @@@ #include #include -#include "openvswitch/datapath-protocol.h" +#include "openvswitch/xflow.h" + #include "table.h" struct sk_buff; @@@ -27,7 -28,9 +28,9 @@@ struct sw_flow_actions struct sw_flow { struct rcu_head rcu; + struct tbl_node tbl_node; + - struct odp_flow_key key; + struct xflow_key key; struct sw_flow_actions *sf_acts; struct timespec used; /* Last used time. */ @@@ -43,12 -46,16 +46,16 @@@ extern struct kmem_cache *flow_cache; struct sw_flow_actions *flow_actions_alloc(size_t n_actions); - void flow_free(struct sw_flow *); void flow_deferred_free(struct sw_flow *); void flow_deferred_free_acts(struct sw_flow_actions *); -int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *); +int flow_extract(struct sk_buff *, u16 in_port, struct xflow_key *); void flow_used(struct sw_flow *, struct sk_buff *); + struct sw_flow *flow_cast(const struct tbl_node *); -u32 flow_hash(const struct odp_flow_key *key); ++u32 flow_hash(const struct xflow_key *key); + int flow_cmp(const struct tbl_node *, void *target); + void flow_free_tbl(struct tbl_node *); + int flow_init(void); void flow_exit(void); diff --cc datapath/linux-2.6/Modules.mk index 7d4fcd042,c63e3eaef..13f67511d --- a/datapath/linux-2.6/Modules.mk +++ b/datapath/linux-2.6/Modules.mk @@@ -10,8 -13,11 +13,12 @@@ openvswitch_headers += linux-2.6/compat-2.6/include/linux/dmi.h \ linux-2.6/compat-2.6/include/linux/err.h \ linux-2.6/compat-2.6/include/linux/icmp.h \ + linux-2.6/compat-2.6/include/linux/if.h \ linux-2.6/compat-2.6/include/linux/if_arp.h \ + linux-2.6/compat-2.6/include/linux/if_ether.h \ + linux-2.6/compat-2.6/include/linux/if_vlan.h \ + linux-2.6/compat-2.6/include/linux/in.h \ + linux-2.6/compat-2.6/include/linux/inetdevice.h \ linux-2.6/compat-2.6/include/linux/ip.h \ linux-2.6/compat-2.6/include/linux/ipv6.h \ linux-2.6/compat-2.6/include/linux/jiffies.h \ diff --cc datapath/vport-netdev.c index 000000000,8cc442107..24fa07716 mode 000000,100644..100644 --- a/datapath/vport-netdev.c +++ b/datapath/vport-netdev.c @@@ -1,0 -1,381 +1,381 @@@ + /* + * Copyright (c) 2010 Nicira Networks. + * Distributed under the terms of the GNU GPL version 2. + * + * Significant portions of this file may be copied from parts of the Linux + * kernel, by Linus Torvalds and others. + */ + + #include + #include + #include + #include + #include + #include + #include + + #include + + #include "datapath.h" + #include "vport-internal_dev.h" + #include "vport-netdev.h" + + #include "compat.h" + + struct vport_ops netdev_vport_ops; + + static void netdev_port_receive(struct net_bridge_port *, struct sk_buff *); + + #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) + static struct llc_sap *netdev_stp_sap; + + static int + netdev_stp_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) + { + /* We don't really care about STP packets, we just listen for them for + * mutual exclusion with the bridge module, so this just discards + * them. */ + kfree_skb(skb); + return 0; + } + + static int + netdev_avoid_bridge_init(void) + { + /* Register to receive STP packets because the bridge module also + * attempts to do so. Since there can only be a single listener for a + * given protocol, this provides mutual exclusion against the bridge + * module, preventing both of them from being loaded at the same + * time. */ + netdev_stp_sap = llc_sap_open(LLC_SAP_BSPAN, netdev_stp_rcv); + if (!netdev_stp_sap) { + printk(KERN_ERR "openvswitch: can't register sap for STP (probably the bridge module is loaded)\n"); + return -EADDRINUSE; + } + return 0; + } + + static void + netdev_avoid_bridge_exit(void) + { + llc_sap_put(netdev_stp_sap); + } + #else /* Linux 2.6.27 or later. */ + static int + netdev_avoid_bridge_init(void) + { + /* Linux 2.6.27 introduces a way for multiple clients to register for + * STP packets, which interferes with what we try to do above. + * Instead, just check whether there's a bridge hook defined. This is + * not as safe--the bridge module is willing to load over the top of + * us--but it provides a little bit of protection. */ + if (br_handle_frame_hook) { + printk(KERN_ERR "openvswitch: bridge module is loaded, cannot load over it\n"); + return -EADDRINUSE; + } + return 0; + } + + static void + netdev_avoid_bridge_exit(void) + { + /* Nothing to do. */ + } + #endif /* Linux 2.6.27 or later */ + + /* + * Used as br_handle_frame_hook. (Cannot run bridge at the same time, even on + * different set of devices!) + */ + #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) + /* Called with rcu_read_lock and bottom-halves disabled. */ + static struct sk_buff * + netdev_frame_hook(struct net_bridge_port *p, struct sk_buff *skb) + { + netdev_port_receive(p, skb); + return NULL; + } + #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + /* Called with rcu_read_lock and bottom-halves disabled. */ + static int + netdev_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb) + { + netdev_port_receive(p, *pskb); + return 1; + } + #else + #error + #endif + + static int + netdev_init(void) + { + int err; + + err = netdev_avoid_bridge_init(); + if (err) + return err; + + /* Hook into callback used by the bridge to intercept packets. + * Parasites we are. */ + br_handle_frame_hook = netdev_frame_hook; + + return 0; + } + + static void + netdev_exit(void) + { + br_handle_frame_hook = NULL; + netdev_avoid_bridge_exit(); + } + + static struct vport * + netdev_create(const char *name, const void __user *config) + { + struct vport *vport; + struct netdev_vport *netdev_vport; + int err; + + vport = vport_alloc(sizeof(struct netdev_vport), &netdev_vport_ops); + if (IS_ERR(vport)) { + err = PTR_ERR(vport); + goto error; + } + + netdev_vport = netdev_vport_priv(vport); + + netdev_vport->dev = dev_get_by_name(&init_net, name); + if (!netdev_vport->dev) { + err = -ENODEV; + goto error_free_vport; + } + + if (netdev_vport->dev->flags & IFF_LOOPBACK || + netdev_vport->dev->type != ARPHRD_ETHER || + is_internal_dev(netdev_vport->dev)) { + err = -EINVAL; + goto error_put; + } + + if (netdev_vport->dev->br_port) { + err = -EBUSY; + goto error_put; + } + + return vport; + + error_put: + dev_put(netdev_vport->dev); + error_free_vport: + vport_free(vport); + error: + return ERR_PTR(err); + } + + static int + netdev_destroy(struct vport *vport) + { + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + + dev_put(netdev_vport->dev); + vport_free(vport); + + return 0; + } + + static int + netdev_attach(struct vport *vport) + { + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + + dev_set_promiscuity(netdev_vport->dev, 1); + dev_disable_lro(netdev_vport->dev); + rcu_assign_pointer(netdev_vport->dev->br_port, (struct net_bridge_port *)vport); + + return 0; + } + + static int + netdev_detach(struct vport *vport) + { + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + + rcu_assign_pointer(netdev_vport->dev->br_port, NULL); + dev_set_promiscuity(netdev_vport->dev, -1); + + return 0; + } + + int + netdev_set_mtu(struct vport *vport, int mtu) + { + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return dev_set_mtu(netdev_vport->dev, mtu); + } + + int + netdev_set_addr(struct vport *vport, const unsigned char *addr) + { + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + struct sockaddr sa; + + sa.sa_family = ARPHRD_ETHER; + memcpy(sa.sa_data, addr, ETH_ALEN); + + return dev_set_mac_address(netdev_vport->dev, &sa); + } + + const char * + netdev_get_name(const struct vport *vport) + { + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return netdev_vport->dev->name; + } + + const unsigned char * + netdev_get_addr(const struct vport *vport) + { + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return netdev_vport->dev->dev_addr; + } + + struct kobject * + netdev_get_kobj(const struct vport *vport) + { + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return &netdev_vport->dev->NETDEV_DEV_MEMBER.kobj; + } + + int -netdev_get_stats(const struct vport *vport, struct odp_vport_stats *stats) ++netdev_get_stats(const struct vport *vport, struct xflow_vport_stats *stats) + { + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + const struct net_device_stats *netdev_stats; + + netdev_stats = dev_get_stats(netdev_vport->dev); + + stats->rx_bytes = netdev_stats->rx_bytes; + stats->rx_packets = netdev_stats->rx_packets; + stats->tx_bytes = netdev_stats->tx_bytes; + stats->tx_packets = netdev_stats->tx_packets; + stats->rx_dropped = netdev_stats->rx_dropped; + stats->rx_errors = netdev_stats->rx_errors; + stats->rx_frame_err = netdev_stats->rx_frame_errors; + stats->rx_over_err = netdev_stats->rx_over_errors; + stats->rx_crc_err = netdev_stats->rx_crc_errors; + stats->tx_dropped = netdev_stats->tx_dropped; + stats->tx_errors = netdev_stats->tx_errors; + stats->collisions = netdev_stats->collisions; + + return 0; + } + + unsigned + netdev_get_dev_flags(const struct vport *vport) + { + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return dev_get_flags(netdev_vport->dev); + } + + int + netdev_is_running(const struct vport *vport) + { + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return netif_running(netdev_vport->dev); + } + + unsigned char + netdev_get_operstate(const struct vport *vport) + { + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return netdev_vport->dev->operstate; + } + + int + netdev_get_ifindex(const struct vport *vport) + { + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return netdev_vport->dev->ifindex; + } + + int + netdev_get_iflink(const struct vport *vport) + { + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return netdev_vport->dev->iflink; + } + + int + netdev_get_mtu(const struct vport *vport) + { + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return netdev_vport->dev->mtu; + } + + /* Must be called with rcu_read_lock. */ + static void + netdev_port_receive(struct net_bridge_port *p, struct sk_buff *skb) + { + struct vport *vport = (struct vport *)p; + + /* Make our own copy of the packet. Otherwise we will mangle the + * packet for anyone who came before us (e.g. tcpdump via AF_PACKET). + * (No one comes after us, since we tell handle_bridge() that we took + * the packet.) */ + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return; + + /* Push the Ethernet header back on. */ + skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + compute_ip_summed(skb, false); + + vport_receive(vport, skb); + } + + static int + netdev_send(struct vport *vport, struct sk_buff *skb) + { + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + int len = skb->len; + + skb->dev = netdev_vport->dev; + forward_ip_summed(skb); + dev_queue_xmit(skb); + + return len; + } + + /* Returns null if this device is not attached to a datapath. */ + struct vport * + netdev_get_vport(struct net_device *dev) + { + return (struct vport *)dev->br_port; + } + + struct vport_ops netdev_vport_ops = { + .type = "netdev", + .flags = VPORT_F_REQUIRED, + .init = netdev_init, + .exit = netdev_exit, + .create = netdev_create, + .destroy = netdev_destroy, + .attach = netdev_attach, + .detach = netdev_detach, + .set_mtu = netdev_set_mtu, + .set_addr = netdev_set_addr, + .get_name = netdev_get_name, + .get_addr = netdev_get_addr, + .get_kobj = netdev_get_kobj, + .get_stats = netdev_get_stats, + .get_dev_flags = netdev_get_dev_flags, + .is_running = netdev_is_running, + .get_operstate = netdev_get_operstate, + .get_ifindex = netdev_get_ifindex, + .get_iflink = netdev_get_iflink, + .get_mtu = netdev_get_mtu, + .send = netdev_send, + }; diff --cc datapath/vport-netdev.h index 000000000,19f176cda..54a9fbf37 mode 000000,100644..100644 --- a/datapath/vport-netdev.h +++ b/datapath/vport-netdev.h @@@ -1,0 -1,41 +1,41 @@@ + /* + * Copyright (c) 2010 Nicira Networks. + * Distributed under the terms of the GNU GPL version 2. + * + * Significant portions of this file may be copied from parts of the Linux + * kernel, by Linus Torvalds and others. + */ + + #ifndef VPORT_NETDEV_H + #define VPORT_NETDEV_H 1 + + #include + + #include "vport.h" + + struct vport *netdev_get_vport(struct net_device *dev); + + struct netdev_vport { + struct net_device *dev; + }; + + static inline struct netdev_vport * + netdev_vport_priv(const struct vport *vport) + { + return vport_priv(vport); + } + + int netdev_set_mtu(struct vport *, int mtu); + int netdev_set_addr(struct vport *, const unsigned char *addr); + const char *netdev_get_name(const struct vport *); + const unsigned char *netdev_get_addr(const struct vport *); + struct kobject *netdev_get_kobj(const struct vport *); -int netdev_get_stats(const struct vport *, struct odp_vport_stats *); ++int netdev_get_stats(const struct vport *, struct xflow_vport_stats *); + unsigned netdev_get_dev_flags(const struct vport *); + int netdev_is_running(const struct vport *); + unsigned char netdev_get_operstate(const struct vport *); + int netdev_get_ifindex(const struct vport *); + int netdev_get_iflink(const struct vport *); + int netdev_get_mtu(const struct vport *); + + #endif /* vport_netdev.h */ diff --cc datapath/vport.c index 000000000,691ab84ba..c7102ed98 mode 000000,100644..100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@@ -1,0 -1,1240 +1,1240 @@@ + /* + * Copyright (c) 2010 Nicira Networks. + * Distributed under the terms of the GNU GPL version 2. + * + * Significant portions of this file may be copied from parts of the Linux + * kernel, by Linus Torvalds and others. + */ + + #include + #include + #include + #include + #include + #include + #include + #include + #include + + #include "vport.h" + + extern struct vport_ops netdev_vport_ops; + extern struct vport_ops internal_vport_ops; + extern struct vport_ops gre_vport_ops; + + static struct vport_ops *base_vport_ops_list[] = { + &netdev_vport_ops, + &internal_vport_ops, + &gre_vport_ops, + }; + + static const struct vport_ops **vport_ops_list; + static int n_vport_types; + + static struct hlist_head *dev_table; + #define VPORT_HASH_BUCKETS 1024 + + /* Both RTNL lock and vport_mutex need to be held when updating dev_table. + * + * If you use vport_locate and then perform some operations, you need to hold + * one of these locks if you don't want the vport to be deleted out from under + * you. + * + * If you get a reference to a vport through a dp_port, it is protected + * by RCU and you need to hold rcu_read_lock instead when reading. + * + * If multiple locks are taken, the hierarchy is: + * 1. RTNL + * 2. DP + * 3. vport + */ + static DEFINE_MUTEX(vport_mutex); + + /** + * vport_lock - acquire vport lock + * + * Acquire global vport lock. See above comment about locking requirements + * and specific function definitions. May sleep. + */ + void + vport_lock(void) + { + mutex_lock(&vport_mutex); + } + + /** + * vport_unlock - release vport lock + * + * Release lock acquired with vport_lock. + */ + void + vport_unlock(void) + { + mutex_unlock(&vport_mutex); + } + + #define ASSERT_VPORT() do { \ + if (unlikely(!mutex_is_locked(&vport_mutex))) { \ + printk(KERN_ERR "openvswitch: vport lock not held at %s (%d)\n", \ + __FILE__, __LINE__); \ + dump_stack(); \ + } \ + } while(0) + + /** + * vport_init - initialize vport subsystem + * + * Called at module load time to initialize the vport subsystem and any + * compiled in vport types. + */ + int + vport_init(void) + { + int err; + int i; + + dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head), + GFP_KERNEL); + if (!dev_table) { + err = -ENOMEM; + goto error; + } + + vport_ops_list = kmalloc(ARRAY_SIZE(base_vport_ops_list) * + sizeof(struct vport_ops *), GFP_KERNEL); + if (!vport_ops_list) { + err = -ENOMEM; + goto error_dev_table; + } + + for (i = 0; i < ARRAY_SIZE(base_vport_ops_list); i++) { + struct vport_ops *new_ops = base_vport_ops_list[i]; + + if (new_ops->get_stats && new_ops->flags & VPORT_F_GEN_STATS) { + printk(KERN_INFO "openvswitch: both get_stats() and VPORT_F_GEN_STATS defined on vport %s, dropping VPORT_F_GEN_STATS\n", new_ops->type); + new_ops->flags &= ~VPORT_F_GEN_STATS; + } + + if (new_ops->init) + err = new_ops->init(); + else + err = 0; + + if (!err) + vport_ops_list[n_vport_types++] = new_ops; + else if (new_ops->flags & VPORT_F_REQUIRED) { + vport_exit(); + goto error; + } + } + + return 0; + + error_dev_table: + kfree(dev_table); + error: + return err; + } + + static void + vport_del_all(void) + { + int i; + + rtnl_lock(); + vport_lock(); + + for (i = 0; i < VPORT_HASH_BUCKETS; i++) { + struct hlist_head *bucket = &dev_table[i]; + struct vport *vport; + struct hlist_node *node, *next; + + hlist_for_each_entry_safe(vport, node, next, bucket, hash_node) + __vport_del(vport); + } + + vport_unlock(); + rtnl_unlock(); + } + + /** + * vport_exit - shutdown vport subsystem + * + * Called at module exit time to shutdown the vport subsystem and any + * initialized vport types. + */ + void + vport_exit(void) + { + int i; + + vport_del_all(); + + for (i = 0; i < n_vport_types; i++) { + if (vport_ops_list[i]->exit) + vport_ops_list[i]->exit(); + } + + kfree(vport_ops_list); + kfree(dev_table); + } + + /** + * vport_add - add vport device (for userspace callers) + * + * @uvport_config: New port configuration. + * + * Creates a new vport with the specified configuration (which is dependent + * on device type). This function is for userspace callers and assumes no + * locks are held. + */ + static int -do_vport_add(struct odp_vport_add *vport_config) ++do_vport_add(struct xflow_vport_add *vport_config) + { + struct vport *vport; + int err = 0; + + vport_config->port_type[VPORT_TYPE_SIZE - 1] = '\0'; + vport_config->devname[IFNAMSIZ - 1] = '\0'; + + rtnl_lock(); + + vport = vport_locate(vport_config->devname); + if (vport) { + err = -EEXIST; + goto out; + } + + vport_lock(); + vport = __vport_add(vport_config->devname, vport_config->port_type, + vport_config->config); + vport_unlock(); + + if (IS_ERR(vport)) + err = PTR_ERR(vport); + + out: + rtnl_unlock(); + return err; + } + + int -vport_add(const struct odp_vport_add __user *uvport_config) ++vport_add(const struct xflow_vport_add __user *uvport_config) + { - struct odp_vport_add vport_config; ++ struct xflow_vport_add vport_config; + - if (copy_from_user(&vport_config, uvport_config, sizeof(struct odp_vport_add))) ++ if (copy_from_user(&vport_config, uvport_config, sizeof(struct xflow_vport_add))) + return -EFAULT; + + return do_vport_add(&vport_config); + } + + #ifdef CONFIG_COMPAT + int -compat_vport_add(struct compat_odp_vport_add *ucompat) ++compat_vport_add(struct compat_xflow_vport_add *ucompat) + { - struct compat_odp_vport_add compat; - struct odp_vport_add vport_config; ++ struct compat_xflow_vport_add compat; ++ struct xflow_vport_add vport_config; + - if (copy_from_user(&compat, ucompat, sizeof(struct compat_odp_vport_add))) ++ if (copy_from_user(&compat, ucompat, sizeof(struct compat_xflow_vport_add))) + return -EFAULT; + + memcpy(vport_config.port_type, compat.port_type, VPORT_TYPE_SIZE); + memcpy(vport_config.devname, compat.devname, IFNAMSIZ); + vport_config.config = compat_ptr(compat.config); + + return do_vport_add(&vport_config); + } + #endif + + /** + * vport_mod - modify existing vport device (for userspace callers) + * + * @uvport_config: New configuration for vport + * + * Modifies an existing device with the specified configuration (which is + * dependent on device type). This function is for userspace callers and + * assumes no locks are held. + */ + static int -do_vport_mod(struct odp_vport_mod *vport_config) ++do_vport_mod(struct xflow_vport_mod *vport_config) + { + struct vport *vport; + int err; + + vport_config->devname[IFNAMSIZ - 1] = '\0'; + + rtnl_lock(); + + vport = vport_locate(vport_config->devname); + if (!vport) { + err = -ENODEV; + goto out; + } + + vport_lock(); + err = __vport_mod(vport, vport_config->config); + vport_unlock(); + + out: + rtnl_unlock(); + return err; + } + + int -vport_mod(const struct odp_vport_mod __user *uvport_config) ++vport_mod(const struct xflow_vport_mod __user *uvport_config) + { - struct odp_vport_mod vport_config; ++ struct xflow_vport_mod vport_config; + - if (copy_from_user(&vport_config, uvport_config, sizeof(struct odp_vport_mod))) ++ if (copy_from_user(&vport_config, uvport_config, sizeof(struct xflow_vport_mod))) + return -EFAULT; + + return do_vport_mod(&vport_config); + } + + #ifdef CONFIG_COMPAT + int -compat_vport_mod(struct compat_odp_vport_mod *ucompat) ++compat_vport_mod(struct compat_xflow_vport_mod *ucompat) + { - struct compat_odp_vport_mod compat; - struct odp_vport_mod vport_config; ++ struct compat_xflow_vport_mod compat; ++ struct xflow_vport_mod vport_config; + - if (copy_from_user(&compat, ucompat, sizeof(struct compat_odp_vport_mod))) ++ if (copy_from_user(&compat, ucompat, sizeof(struct compat_xflow_vport_mod))) + return -EFAULT; + + memcpy(vport_config.devname, compat.devname, IFNAMSIZ); + vport_config.config = compat_ptr(compat.config); + + return do_vport_mod(&vport_config); + } + #endif + + /** + * vport_del - delete existing vport device (for userspace callers) + * + * @udevname: Name of device to delete + * + * Deletes the specified device. Detaches the device from a datapath first + * if it is attached. Deleting the device will fail if it does not exist or it + * is the datapath local port. It is also possible to fail for less obvious + * reasons, such as lack of memory. This function is for userspace callers and + * assumes no locks are held. + */ + int + vport_del(const char __user *udevname) + { + char devname[IFNAMSIZ]; + struct vport *vport; + struct dp_port *dp_port; + int err = 0; + int retval; + + retval = strncpy_from_user(devname, udevname, IFNAMSIZ); + if (retval < 0) + return -EFAULT; + else if (retval >= IFNAMSIZ) + return -ENAMETOOLONG; + + rtnl_lock(); + + vport = vport_locate(devname); + if (!vport) { + err = -ENODEV; + goto out; + } + + dp_port = vport_get_dp_port(vport); + if (dp_port) { + struct datapath *dp = dp_port->dp; + + mutex_lock(&dp->mutex); + + if (!strcmp(dp_name(dp), devname)) { + err = -EINVAL; + goto dp_port_out; + } + + err = dp_detach_port(dp_port, 0); + + dp_port_out: + mutex_unlock(&dp->mutex); + + if (err) + goto out; + } + + vport_lock(); + err = __vport_del(vport); + vport_unlock(); + + out: + rtnl_unlock(); + return err; + } + + /** + * vport_stats_get - retrieve device stats (for userspace callers) + * + * @ustats_req: Stats request parameters. + * + * Retrieves transmit, receive, and error stats for the given device. This + * function is for userspace callers and assumes no locks are held. + */ + int -vport_stats_get(struct odp_vport_stats_req __user *ustats_req) ++vport_stats_get(struct xflow_vport_stats_req __user *ustats_req) + { - struct odp_vport_stats_req stats_req; ++ struct xflow_vport_stats_req stats_req; + struct vport *vport; + int err; + - if (copy_from_user(&stats_req, ustats_req, sizeof(struct odp_vport_stats_req))) ++ if (copy_from_user(&stats_req, ustats_req, sizeof(struct xflow_vport_stats_req))) + return -EFAULT; + + stats_req.devname[IFNAMSIZ - 1] = '\0'; + + vport_lock(); + + vport = vport_locate(stats_req.devname); + if (!vport) { + err = -ENODEV; + goto out; + } + + if (vport->ops->get_stats) { + rcu_read_lock(); + err = vport->ops->get_stats(vport, &stats_req.stats); + rcu_read_unlock(); + + } else if (vport->ops->flags & VPORT_F_GEN_STATS) { + int i; + - memset(&stats_req.stats, 0, sizeof(struct odp_vport_stats)); ++ memset(&stats_req.stats, 0, sizeof(struct xflow_vport_stats)); + + for_each_possible_cpu(i) { + const struct vport_percpu_stats *percpu_stats; + + percpu_stats = per_cpu_ptr(vport->percpu_stats, i); + stats_req.stats.rx_bytes += percpu_stats->rx_bytes; + stats_req.stats.rx_packets += percpu_stats->rx_packets; + stats_req.stats.tx_bytes += percpu_stats->tx_bytes; + stats_req.stats.tx_packets += percpu_stats->tx_packets; + } + + spin_lock_bh(&vport->err_stats.lock); + + stats_req.stats.rx_dropped = vport->err_stats.rx_dropped; + stats_req.stats.rx_errors = vport->err_stats.rx_errors + + vport->err_stats.rx_frame_err + + vport->err_stats.rx_over_err + + vport->err_stats.rx_crc_err; + stats_req.stats.rx_frame_err = vport->err_stats.rx_frame_err; + stats_req.stats.rx_over_err = vport->err_stats.rx_over_err; + stats_req.stats.rx_crc_err = vport->err_stats.rx_crc_err; + stats_req.stats.tx_dropped = vport->err_stats.tx_dropped; + stats_req.stats.tx_errors = vport->err_stats.tx_errors; + stats_req.stats.collisions = vport->err_stats.collisions; + + spin_unlock_bh(&vport->err_stats.lock); + + err = 0; + } else + err = -EOPNOTSUPP; + + out: + vport_unlock(); + + if (!err) - if (copy_to_user(ustats_req, &stats_req, sizeof(struct odp_vport_stats_req))) ++ if (copy_to_user(ustats_req, &stats_req, sizeof(struct xflow_vport_stats_req))) + err = -EFAULT; + + return err; + } + + /** + * vport_ether_get - retrieve device Ethernet address (for userspace callers) + * + * @uvport_ether: Ethernet address request parameters. + * + * Retrieves the Ethernet address of the given device. This function is for + * userspace callers and assumes no locks are held. + */ + int -vport_ether_get(struct odp_vport_ether __user *uvport_ether) ++vport_ether_get(struct xflow_vport_ether __user *uvport_ether) + { - struct odp_vport_ether vport_ether; ++ struct xflow_vport_ether vport_ether; + struct vport *vport; + int err = 0; + - if (copy_from_user(&vport_ether, uvport_ether, sizeof(struct odp_vport_ether))) ++ if (copy_from_user(&vport_ether, uvport_ether, sizeof(struct xflow_vport_ether))) + return -EFAULT; + + vport_ether.devname[IFNAMSIZ - 1] = '\0'; + + vport_lock(); + + vport = vport_locate(vport_ether.devname); + if (!vport) { + err = -ENODEV; + goto out; + } + + rcu_read_lock(); + memcpy(vport_ether.ether_addr, vport_get_addr(vport), ETH_ALEN); + rcu_read_unlock(); + + out: + vport_unlock(); + + if (!err) - if (copy_to_user(uvport_ether, &vport_ether, sizeof(struct odp_vport_ether))) ++ if (copy_to_user(uvport_ether, &vport_ether, sizeof(struct xflow_vport_ether))) + err = -EFAULT; + + return err; + } + + /** + * vport_ether_set - set device Ethernet address (for userspace callers) + * + * @uvport_ether: Ethernet address request parameters. + * + * Sets the Ethernet address of the given device. Some devices may not support + * setting the Ethernet address, in which case the result will always be + * -EOPNOTSUPP. This function is for userspace callers and assumes no locks + * are held. + */ + int -vport_ether_set(struct odp_vport_ether __user *uvport_ether) ++vport_ether_set(struct xflow_vport_ether __user *uvport_ether) + { - struct odp_vport_ether vport_ether; ++ struct xflow_vport_ether vport_ether; + struct vport *vport; + int err; + - if (copy_from_user(&vport_ether, uvport_ether, sizeof(struct odp_vport_ether))) ++ if (copy_from_user(&vport_ether, uvport_ether, sizeof(struct xflow_vport_ether))) + return -EFAULT; + + vport_ether.devname[IFNAMSIZ - 1] = '\0'; + + rtnl_lock(); + vport_lock(); + + vport = vport_locate(vport_ether.devname); + if (!vport) { + err = -ENODEV; + goto out; + } + + err = vport_set_addr(vport, vport_ether.ether_addr); + + out: + vport_unlock(); + rtnl_unlock(); + return err; + } + + /** + * vport_mut_get - retrieve device MTU (for userspace callers) + * + * @uvport_mtu: MTU request parameters. + * + * Retrieves the MTU of the given device. This function is for userspace + * callers and assumes no locks are held. + */ + int -vport_mtu_get(struct odp_vport_mtu __user *uvport_mtu) ++vport_mtu_get(struct xflow_vport_mtu __user *uvport_mtu) + { - struct odp_vport_mtu vport_mtu; ++ struct xflow_vport_mtu vport_mtu; + struct vport *vport; + int err = 0; + - if (copy_from_user(&vport_mtu, uvport_mtu, sizeof(struct odp_vport_mtu))) ++ if (copy_from_user(&vport_mtu, uvport_mtu, sizeof(struct xflow_vport_mtu))) + return -EFAULT; + + vport_mtu.devname[IFNAMSIZ - 1] = '\0'; + + vport_lock(); + + vport = vport_locate(vport_mtu.devname); + if (!vport) { + err = -ENODEV; + goto out; + } + + vport_mtu.mtu = vport_get_mtu(vport); + + out: + vport_unlock(); + + if (!err) - if (copy_to_user(uvport_mtu, &vport_mtu, sizeof(struct odp_vport_mtu))) ++ if (copy_to_user(uvport_mtu, &vport_mtu, sizeof(struct xflow_vport_mtu))) + err = -EFAULT; + + return err; + } + + /** + * vport_mtu_set - set device MTU (for userspace callers) + * + * @uvport_mtu: MTU request parameters. + * + * Sets the MTU of the given device. Some devices may not support setting the + * MTU, in which case the result will always be -EOPNOTSUPP. This function is + * for userspace callers and assumes no locks are held. + */ + int -vport_mtu_set(struct odp_vport_mtu __user *uvport_mtu) ++vport_mtu_set(struct xflow_vport_mtu __user *uvport_mtu) + { - struct odp_vport_mtu vport_mtu; ++ struct xflow_vport_mtu vport_mtu; + struct vport *vport; + int err; + - if (copy_from_user(&vport_mtu, uvport_mtu, sizeof(struct odp_vport_mtu))) ++ if (copy_from_user(&vport_mtu, uvport_mtu, sizeof(struct xflow_vport_mtu))) + return -EFAULT; + + vport_mtu.devname[IFNAMSIZ - 1] = '\0'; + + rtnl_lock(); + vport_lock(); + + vport = vport_locate(vport_mtu.devname); + if (!vport) { + err = -ENODEV; + goto out; + } + + err = vport_set_mtu(vport, vport_mtu.mtu); + + out: + vport_unlock(); + rtnl_unlock(); + return err; + } + + static struct hlist_head * + hash_bucket(const char *name) + { + unsigned int hash = full_name_hash(name, strlen(name)); + return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)]; + } + + /** + * vport_locate - find a port that has already been created + * + * @name: name of port to find + * + * Either RTNL or vport lock must be acquired before calling this function + * and held while using the found port. See the locking comments at the + * top of the file. + */ + struct vport * + vport_locate(const char *name) + { + struct hlist_head *bucket = hash_bucket(name); + struct vport *vport; + struct hlist_node *node; + + if (unlikely(!mutex_is_locked(&vport_mutex) && !rtnl_is_locked())) { + printk(KERN_ERR "openvswitch: neither RTNL nor vport lock held in vport_locate\n"); + dump_stack(); + } + + rcu_read_lock(); + + hlist_for_each_entry(vport, node, bucket, hash_node) + if (!strcmp(name, vport_get_name(vport))) + goto out; + + vport = NULL; + + out: + rcu_read_unlock(); + return vport; + } + + static void + register_vport(struct vport *vport) + { + hlist_add_head(&vport->hash_node, hash_bucket(vport_get_name(vport))); + } + + static void + unregister_vport(struct vport *vport) + { + hlist_del(&vport->hash_node); + } + + /** + * vport_alloc - allocate and initialize new vport + * + * @priv_size: Size of private data area to allocate. + * @ops: vport device ops + * + * Allocate and initialize a new vport defined by @ops. The vport will contain + * a private data area of size @priv_size that can be accessed using + * vport_priv(). vports that are no longer needed should be released with + * vport_free(). + */ + struct vport * + vport_alloc(int priv_size, const struct vport_ops *ops) + { + struct vport *vport; + size_t alloc_size; + + alloc_size = sizeof(struct vport); + if (priv_size) { + alloc_size = ALIGN(alloc_size, VPORT_ALIGN); + alloc_size += priv_size; + } + + vport = kzalloc(alloc_size, GFP_KERNEL); + if (!vport) + return ERR_PTR(-ENOMEM); + + vport->ops = ops; + + if (vport->ops->flags & VPORT_F_GEN_STATS) { + vport->percpu_stats = alloc_percpu(struct vport_percpu_stats); + if (!vport->percpu_stats) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&vport->err_stats.lock); + } + + return vport; + } + + /** + * vport_free - uninitialize and free vport + * + * @vport: vport to free + * + * Frees a vport allocated with vport_alloc() when it is no longer needed. + */ + void + vport_free(struct vport *vport) + { + if (vport->ops->flags & VPORT_F_GEN_STATS) + free_percpu(vport->percpu_stats); + + kfree(vport); + } + + /** + * __vport_add - add vport device (for kernel callers) + * + * @name: Name of new device. + * @type: Type of new device (to be matched against types in registered vport + * ops). + * @config: Device type specific configuration. Userspace pointer. + * + * Creates a new vport with the specified configuration (which is dependent + * on device type). Both RTNL and vport locks must be held. + */ + struct vport * + __vport_add(const char *name, const char *type, const void __user *config) + { + struct vport *vport; + int err = 0; + int i; + + ASSERT_RTNL(); + ASSERT_VPORT(); + + for (i = 0; i < n_vport_types; i++) { + if (!strcmp(vport_ops_list[i]->type, type)) { + vport = vport_ops_list[i]->create(name, config); + if (IS_ERR(vport)) { + err = PTR_ERR(vport); + goto out; + } + + register_vport(vport); + return vport; + } + } + + err = -EAFNOSUPPORT; + + out: + return ERR_PTR(err); + } + + /** + * __vport_mod - modify existing vport device (for kernel callers) + * + * @vport: vport to modify. + * @config: Device type specific configuration. Userspace pointer. + * + * Modifies an existing device with the specified configuration (which is + * dependent on device type). Both RTNL and vport locks must be held. + */ + int + __vport_mod(struct vport *vport, const void __user *config) + { + ASSERT_RTNL(); + ASSERT_VPORT(); + + if (vport->ops->modify) + return vport->ops->modify(vport, config); + else + return -EOPNOTSUPP; + } + + /** + * __vport_del - delete existing vport device (for kernel callers) + * + * @vport: vport to delete. + * + * Deletes the specified device. The device must not be currently attached to + * a datapath. It is possible to fail for reasons such as lack of memory. + * Both RTNL and vport locks must be held. + */ + int + __vport_del(struct vport *vport) + { + ASSERT_RTNL(); + ASSERT_VPORT(); + BUG_ON(vport_get_dp_port(vport)); + + unregister_vport(vport); + + return vport->ops->destroy(vport); + } + + /** + * vport_attach - attach a vport to a datapath + * + * @vport: vport to attach. + * @dp_port: Datapath port to attach the vport to. + * + * Attaches a vport to a specific datapath so that packets may be exchanged. + * Both ports must be currently unattached. @dp_port must be successfully + * attached to a vport before it is connected to a datapath and must not be + * modified while connected. RTNL lock and the appropriate DP mutex must be held. + */ + int + vport_attach(struct vport *vport, struct dp_port *dp_port) + { + ASSERT_RTNL(); + + if (dp_port->vport) + return -EBUSY; + + if (vport_get_dp_port(vport)) + return -EBUSY; + + if (vport->ops->attach) { + int err; + + err = vport->ops->attach(vport); + if (err) + return err; + } + + dp_port->vport = vport; + rcu_assign_pointer(vport->dp_port, dp_port); + + return 0; + } + + /** + * vport_detach - detach a vport from a datapath + * + * @vport: vport to detach. + * + * Detaches a vport from a datapath. May fail for a variety of reasons, + * including lack of memory. RTNL lock and the appropriate DP mutex must be held. + */ + int + vport_detach(struct vport *vport) + { + struct dp_port *dp_port; + + ASSERT_RTNL(); + + dp_port = vport_get_dp_port(vport); + if (!dp_port) + return -EINVAL; + + dp_port->vport = NULL; + rcu_assign_pointer(vport->dp_port, NULL); + + if (vport->ops->detach) + return vport->ops->detach(vport); + else + return 0; + } + + /** + * vport_set_mtu - set device MTU (for kernel callers) + * + * @vport: vport on which to set MTU. + * @mtu: New MTU. + * + * Sets the MTU of the given device. Some devices may not support setting the + * MTU, in which case the result will always be -EOPNOTSUPP. RTNL lock must + * be held. + */ + int + vport_set_mtu(struct vport *vport, int mtu) + { + ASSERT_RTNL(); + + if (mtu < 68) + return -EINVAL; + + if (vport->ops->set_mtu) + return vport->ops->set_mtu(vport, mtu); + else + return -EOPNOTSUPP; + } + + /** + * vport_set_addr - set device Ethernet address (for kernel callers) + * + * @vport: vport on which to set Ethernet address. + * @addr: New address. + * + * Sets the Ethernet address of the given device. Some devices may not support + * setting the Ethernet address, in which case the result will always be + * -EOPNOTSUPP. RTNL lock must be held. + */ + int + vport_set_addr(struct vport *vport, const unsigned char *addr) + { + ASSERT_RTNL(); + + if (!is_valid_ether_addr(addr)) + return -EADDRNOTAVAIL; + + if (vport->ops->set_addr) + return vport->ops->set_addr(vport, addr); + else + return -EOPNOTSUPP; + } + + /** + * vport_get_name - retrieve device name + * + * @vport: vport from which to retrieve the name. + * + * Retrieves the name of the given device. Either RTNL lock or rcu_read_lock + * must be held for the entire duration that the name is in use. + */ + const char * + vport_get_name(const struct vport *vport) + { + return vport->ops->get_name(vport); + } + + /** + * vport_get_type - retrieve device type + * + * @vport: vport from which to retrieve the type. + * + * Retrieves the type of the given device. Either RTNL lock or rcu_read_lock + * must be held for the entire duration that the type is in use. + */ + const char * + vport_get_type(const struct vport *vport) + { + return vport->ops->type; + } + + /** + * vport_get_addr - retrieve device Ethernet address (for kernel callers) + * + * @vport: vport from which to retrieve the Ethernet address. + * + * Retrieves the Ethernet address of the given device. Either RTNL lock or + * rcu_read_lock must be held for the entire duration that the Ethernet address + * is in use. + */ + const unsigned char * + vport_get_addr(const struct vport *vport) + { + return vport->ops->get_addr(vport); + } + + /** + * vport_get_dp_port - retrieve attached datapath port + * + * @vport: vport from which to retrieve the datapath port. + * + * Retrieves the attached datapath port or null if not attached. Either RTNL + * lock or rcu_read_lock must be held for the entire duration that the datapath + * port is being accessed. + */ + struct dp_port * + vport_get_dp_port(const struct vport *vport) + { + return rcu_dereference(vport->dp_port); + } + + /** + * vport_get_kobj - retrieve associated kobj + * + * @vport: vport from which to retrieve the associated kobj + * + * Retrieves the associated kobj or null if no kobj. The returned kobj is + * valid for as long as the vport exists. + */ + struct kobject * + vport_get_kobj(const struct vport *vport) + { + if (vport->ops->get_kobj) + return vport->ops->get_kobj(vport); + else + return NULL; + } + + /** + * vport_get_flags - retrieve device flags + * + * @vport: vport from which to retrieve the flags + * + * Retrieves the flags of the given device. Either RTNL lock or rcu_read_lock + * must be held. + */ + unsigned + vport_get_flags(const struct vport *vport) + { + return vport->ops->get_dev_flags(vport); + } + + /** + * vport_get_flags - check whether device is running + * + * @vport: vport on which to check status. + * + * Checks whether the given device is running. Either RTNL lock or + * rcu_read_lock must be held. + */ + int + vport_is_running(const struct vport *vport) + { + return vport->ops->is_running(vport); + } + + /** + * vport_get_flags - retrieve device operating state + * + * @vport: vport from which to check status + * + * Retrieves the RFC2863 operstate of the given device. Either RTNL lock or + * rcu_read_lock must be held. + */ + unsigned char + vport_get_operstate(const struct vport *vport) + { + return vport->ops->get_operstate(vport); + } + + /** + * vport_get_ifindex - retrieve device system interface index + * + * @vport: vport from which to retrieve index + * + * Retrieves the system interface index of the given device. Not all devices + * will have system indexes, in which case the index of the datapath local + * port is returned. Returns a negative index on error. Either RTNL lock or + * rcu_read_lock must be held. + */ + int + vport_get_ifindex(const struct vport *vport) + { + const struct dp_port *dp_port; + + if (vport->ops->get_ifindex) + return vport->ops->get_ifindex(vport); + + /* If we don't actually have an ifindex, use the local port's. + * Userspace doesn't check it anyways. */ + dp_port = vport_get_dp_port(vport); + if (!dp_port) + return -EAGAIN; + - return vport_get_ifindex(dp_port->dp->ports[ODPP_LOCAL]->vport); ++ return vport_get_ifindex(dp_port->dp->ports[XFLOWP_LOCAL]->vport); + } + + /** + * vport_get_iflink - retrieve device system link index + * + * @vport: vport from which to retrieve index + * + * Retrieves the system link index of the given device. The link is the index + * of the interface on which the packet will actually be sent. In most cases + * this is the same as the ifindex but may be different for tunnel devices. + * Returns a negative index on error. Either RTNL lock or rcu_read_lock must + * be held. + */ + int + vport_get_iflink(const struct vport *vport) + { + if (vport->ops->get_iflink) + return vport->ops->get_iflink(vport); + + /* If we don't have an iflink, use the ifindex. In most cases they + * are the same. */ + return vport_get_ifindex(vport); + } + + /** + * vport_get_mtu - retrieve device MTU (for kernel callers) + * + * @vport: vport from which to retrieve MTU + * + * Retrieves the MTU of the given device. Either RTNL lock or rcu_read_lock + * must be held. + */ + int + vport_get_mtu(const struct vport *vport) + { + return vport->ops->get_mtu(vport); + } + + /** + * vport_receive - pass up received packet to the datapath for processing + * + * @vport: vport that received the packet + * @skb: skb that was received + * + * Must be called with rcu_read_lock. The packet cannot be shared and + * skb->data should point to the Ethernet header. The caller must have already + * called compute_ip_summed() to initialize the checksumming fields. + */ + void + vport_receive(struct vport *vport, struct sk_buff *skb) + { + struct dp_port *dp_port = vport_get_dp_port(vport); + + if (!dp_port) { + vport_record_error(vport, VPORT_E_RX_DROPPED); + kfree_skb(skb); + + return; + } + + if (vport->ops->flags & VPORT_F_GEN_STATS) { + struct vport_percpu_stats *stats; + + local_bh_disable(); + + stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id()); + stats->rx_packets++; + stats->rx_bytes += skb->len; + + local_bh_enable(); + } + + if (!(vport->ops->flags & VPORT_F_TUN_ID)) + OVS_CB(skb)->tun_id = 0; + + dp_process_received_packet(dp_port, skb); + } + + /** + * vport_send - send a packet on a device + * + * @vport: vport on which to send the packet + * @skb: skb to send + * + * Sends the given packet and returns the length of data sent. Either RTNL + * lock or rcu_read_lock must be held. + */ + int + vport_send(struct vport *vport, struct sk_buff *skb) + { + int sent; + + sent = vport->ops->send(vport, skb); + + if (vport->ops->flags & VPORT_F_GEN_STATS && sent > 0) { + struct vport_percpu_stats *stats; + + local_bh_disable(); + + stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id()); + stats->tx_packets++; + stats->tx_bytes += sent; + + local_bh_enable(); + } + + return sent; + } + + /** + * vport_record_error - indicate device error to generic stats layer + * + * @vport: vport that encountered the error + * @err_type: one of enum vport_err_type types to indicate the error type + * + * If using the vport generic stats layer indicate that an error of the given + * type has occured. + */ + void + vport_record_error(struct vport *vport, enum vport_err_type err_type) + { + if (vport->ops->flags & VPORT_F_GEN_STATS) { + + spin_lock_bh(&vport->err_stats.lock); + + switch (err_type) { + case VPORT_E_RX_DROPPED: + vport->err_stats.rx_dropped++; + break; + + case VPORT_E_RX_ERROR: + vport->err_stats.rx_errors++; + break; + + case VPORT_E_RX_FRAME: + vport->err_stats.rx_frame_err++; + break; + + case VPORT_E_RX_OVER: + vport->err_stats.rx_over_err++; + break; + + case VPORT_E_RX_CRC: + vport->err_stats.rx_crc_err++; + break; + + case VPORT_E_TX_DROPPED: + vport->err_stats.tx_dropped++; + break; + + case VPORT_E_TX_ERROR: + vport->err_stats.tx_errors++; + break; + + case VPORT_E_COLLISION: + vport->err_stats.collisions++; + break; + }; + + spin_unlock_bh(&vport->err_stats.lock); + } + } + + /** + * vport_gen_ether_addr - generate an Ethernet address + * + * @addr: location to store generated address + * + * Generates a random Ethernet address for use when creating a device that + * has no natural address. + */ + void + vport_gen_ether_addr(u8 *addr) + { + random_ether_addr(addr); + + /* Set the OUI to the Nicira one. */ + addr[0] = 0x00; + addr[1] = 0x23; + addr[2] = 0x20; + + /* Set the top bit to indicate random address. */ + addr[3] |= 0x80; + } diff --cc datapath/vport.h index 000000000,a26f232a7..be9a07b95 mode 000000,100644..100644 --- a/datapath/vport.h +++ b/datapath/vport.h @@@ -1,0 -1,241 +1,241 @@@ + /* + * Copyright (c) 2010 Nicira Networks. + * Distributed under the terms of the GNU GPL version 2. + * + * Significant portions of this file may be copied from parts of the Linux + * kernel, by Linus Torvalds and others. + */ + + #ifndef VPORT_H + #define VPORT_H 1 + + #include + #include + #include + + #include "datapath.h" -#include "openvswitch/datapath-protocol.h" -#include "odp-compat.h" ++#include "openvswitch/xflow.h" ++#include "xflow-compat.h" + + struct vport; + struct dp_port; + + /* The following definitions are for users of the vport subsytem: */ + + void vport_lock(void); + void vport_unlock(void); + + int vport_init(void); + void vport_exit(void); + -int vport_add(const struct odp_vport_add __user *); -int vport_mod(const struct odp_vport_mod __user *); ++int vport_add(const struct xflow_vport_add __user *); ++int vport_mod(const struct xflow_vport_mod __user *); + int vport_del(const char __user *udevname); + + #ifdef CONFIG_COMPAT -int compat_vport_add(struct compat_odp_vport_add __user *); -int compat_vport_mod(struct compat_odp_vport_mod __user *); ++int compat_vport_add(struct compat_xflow_vport_add __user *); ++int compat_vport_mod(struct compat_xflow_vport_mod __user *); + #endif + -int vport_stats_get(struct odp_vport_stats_req __user *); -int vport_ether_get(struct odp_vport_ether __user *); -int vport_ether_set(struct odp_vport_ether __user *); -int vport_mtu_get(struct odp_vport_mtu __user *); -int vport_mtu_set(struct odp_vport_mtu __user *); ++int vport_stats_get(struct xflow_vport_stats_req __user *); ++int vport_ether_get(struct xflow_vport_ether __user *); ++int vport_ether_set(struct xflow_vport_ether __user *); ++int vport_mtu_get(struct xflow_vport_mtu __user *); ++int vport_mtu_set(struct xflow_vport_mtu __user *); + + struct vport *__vport_add(const char *name, const char *type, const void __user *config); + int __vport_mod(struct vport *, const void __user *config); + int __vport_del(struct vport *); + + struct vport *vport_locate(const char *name); + + int vport_attach(struct vport *, struct dp_port *); + int vport_detach(struct vport *); + + int vport_set_mtu(struct vport *, int mtu); + int vport_set_addr(struct vport *, const unsigned char *); + + const char *vport_get_name(const struct vport *); + const char *vport_get_type(const struct vport *); + const unsigned char *vport_get_addr(const struct vport *); + + struct dp_port *vport_get_dp_port(const struct vport *); + + struct kobject *vport_get_kobj(const struct vport *); + + unsigned vport_get_flags(const struct vport *); + int vport_is_running(const struct vport *); + unsigned char vport_get_operstate(const struct vport *); + + int vport_get_ifindex(const struct vport *); + int vport_get_iflink(const struct vport *); + + int vport_get_mtu(const struct vport *); + + int vport_send(struct vport *, struct sk_buff *); + + /* The following definitions are for implementers of vport devices: */ + + struct vport_percpu_stats { + u64 rx_bytes; + u64 rx_packets; + u64 tx_bytes; + u64 tx_packets; + }; + + struct vport_err_stats { + spinlock_t lock; + + u64 rx_dropped; + u64 rx_errors; + u64 rx_frame_err; + u64 rx_over_err; + u64 rx_crc_err; + u64 tx_dropped; + u64 tx_errors; + u64 collisions; + }; + + struct vport { + struct hlist_node hash_node; + const struct vport_ops *ops; + struct dp_port *dp_port; + + struct vport_percpu_stats *percpu_stats; + struct vport_err_stats err_stats; + }; + + #define VPORT_F_REQUIRED (1 << 0) /* If init fails, module loading fails. */ + #define VPORT_F_GEN_STATS (1 << 1) /* Track stats at the generic layer. */ + #define VPORT_F_TUN_ID (1 << 2) /* Sets OVS_CB(skb)->tun_id. */ + + /** + * struct vport_ops - definition of a type of virtual port + * + * @type: Name of port type, such as "netdev" or "internal" to be matched + * against the device type when a new port needs to be created. + * @flags: Flags of type VPORT_F_* that influence how the generic vport layer + * handles this vport. + * @init: Called at module initialization. If VPORT_F_REQUIRED is set then the + * failure of this function will cause the module to not load. If the flag is + * not set and initialzation fails then no vports of this type can be created. + * @exit: Called at module unload. + * @create: Create a new vport called 'name' with vport type specific + * configuration 'config' (which must be copied from userspace before use). On + * success must allocate a new vport using vport_alloc(). + * @modify: Modify the configuration of an existing vport. May be null if + * modification is not supported. + * @destroy: Destroy and free a vport using vport_free(). Prior to destruction + * @detach will be called followed by synchronize_rcu(). + * @attach: Attach a previously created vport to a datapath. After attachment + * packets may be sent and received. Prior to attachment any packets may be + * silently discarded. May be null if not needed. + * @detach: Detach a vport from a datapath. May be null if not needed. + * @set_mtu: Set the device's MTU. May be null if not supported. + * @set_addr: Set the device's MAC address. May be null if not supported. + * @get_name: Get the device's name. + * @get_addr: Get the device's MAC address. + * @get_kobj: Get the kobj associated with the device (may return null). + * @get_stats: Fill in the transmit/receive stats. May be null if stats are + * not supported or if generic stats are in use. If defined overrides + * VPORT_F_GEN_STATS. + * @get_dev_flags: Get the device's flags. + * @is_running: Checks whether the device is running. + * @get_operstate: Get the device's operating state. + * @get_ifindex: Get the system interface index associated with the device. + * May be null if the device does not have an ifindex. + * @get_iflink: Get the system interface index associated with the device that + * will be used to send packets (may be different than ifindex for tunnels). + * May be null if the device does not have an iflink. + * @get_mtu: Get the device's MTU. + * @send: Send a packet on the device. Returns the length of the packet sent. + */ + struct vport_ops { + const char *type; + u32 flags; + + /* Called at module init and exit respectively. */ + int (*init)(void); + void (*exit)(void); + + /* Called with RTNL lock. */ + struct vport *(*create)(const char *name, const void __user *config); + int (*modify)(struct vport *, const void __user *config); + int (*destroy)(struct vport *); + + int (*attach)(struct vport *); + int (*detach)(struct vport *); + + int (*set_mtu)(struct vport *, int mtu); + int (*set_addr)(struct vport *, const unsigned char *); + + /* Called with rcu_read_lock or RTNL lock. */ + const char *(*get_name)(const struct vport *); + const unsigned char *(*get_addr)(const struct vport *); + struct kobject *(*get_kobj)(const struct vport *); - int (*get_stats)(const struct vport *, struct odp_vport_stats *); ++ int (*get_stats)(const struct vport *, struct xflow_vport_stats *); + + unsigned (*get_dev_flags)(const struct vport *); + int (*is_running)(const struct vport *); + unsigned char (*get_operstate)(const struct vport *); + + int (*get_ifindex)(const struct vport *); + int (*get_iflink)(const struct vport *); + + int (*get_mtu)(const struct vport *); + + int (*send)(struct vport *, struct sk_buff *); + }; + + enum vport_err_type { + VPORT_E_RX_DROPPED, + VPORT_E_RX_ERROR, + VPORT_E_RX_FRAME, + VPORT_E_RX_OVER, + VPORT_E_RX_CRC, + VPORT_E_TX_DROPPED, + VPORT_E_TX_ERROR, + VPORT_E_COLLISION, + }; + + struct vport *vport_alloc(int priv_size, const struct vport_ops *); + void vport_free(struct vport *); + + #define VPORT_ALIGN 8 + + /** + * vport_priv - access private data area of vport + * + * @vport: vport to access + * + * If a nonzero size was passed in priv_size of vport_alloc() a private data + * area was allocated on creation. This allows that area to be accessed and + * used for any purpose needed by the vport implementer. + */ + static inline void * + vport_priv(const struct vport *vport) + { + return (u8 *)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN); + } + + /** + * vport_from_priv - lookup vport from private data pointer + * + * @priv: Start of private data area. + * + * It is sometimes useful to translate from a pointer to the private data + * area to the vport, such as in the case where the private data pointer is + * the result of a hash table lookup. @priv must point to the start of the + * private data area. + */ + static inline struct vport * + vport_from_priv(const void *priv) + { + return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); + } + + void vport_receive(struct vport *, struct sk_buff *); + void vport_record_error(struct vport *, enum vport_err_type err_type); + void vport_gen_ether_addr(u8 *addr); + + #endif /* vport.h */ diff --cc datapath/xflow-compat.h index 000000000,000000000..7cffe2735 new file mode 100644 --- /dev/null +++ b/datapath/xflow-compat.h @@@ -1,0 -1,0 +1,83 @@@ ++/* ++ * Copyright (c) 2010 Nicira Networks. ++ * Distributed under the terms of the GNU GPL version 2. ++ * ++ * Significant portions of this file may be copied from parts of the Linux ++ * kernel, by Linus Torvalds and others. ++ */ ++ ++#ifndef XFLOW_COMPAT_H ++#define XFLOW_COMPAT_H 1 ++ ++/* 32-bit ioctl compatibility definitions for datapath protocol. */ ++ ++#ifdef CONFIG_COMPAT ++#include "openvswitch/datapath-protocol.h" ++#include ++ ++#define XFLOW_PORT_LIST32 _IOWR('O', 10, struct compat_xflow_portvec) ++#define XFLOW_PORT_GROUP_SET32 _IOR('O', 11, struct compat_xflow_port_group) ++#define XFLOW_PORT_GROUP_GET32 _IOWR('O', 12, struct compat_xflow_port_group) ++#define XFLOW_FLOW_GET32 _IOWR('O', 13, struct compat_xflow_flow) ++#define XFLOW_FLOW_PUT32 _IOWR('O', 14, struct compat_xflow_flow) ++#define XFLOW_FLOW_LIST32 _IOWR('O', 15, struct compat_xflow_flowvec) ++#define XFLOW_FLOW_DEL32 _IOWR('O', 17, struct compat_xflow_flow) ++#define XFLOW_EXECUTE32 _IOR('O', 18, struct compat_xflow_execute) ++#define XFLOW_FLOW_DEL32 _IOWR('O', 17, struct compat_xflow_flow) ++#define XFLOW_VPORT_ADD32 _IOR('O', 21, struct compat_xflow_vport_add) ++#define XFLOW_VPORT_MOD32 _IOR('O', 22, struct compat_xflow_vport_mod) ++ ++struct compat_xflow_portvec { ++ compat_uptr_t ports; ++ u32 n_ports; ++}; ++ ++struct compat_xflow_port_group { ++ compat_uptr_t ports; ++ u16 n_ports; /* Number of ports. */ ++ u16 group; /* Group number. */ ++}; ++ ++struct compat_xflow_flow { ++ struct xflow_flow_stats stats; ++ struct xflow_key key; ++ compat_uptr_t actions; ++ u32 n_actions; ++ u32 flags; ++}; ++ ++struct compat_xflow_flow_put { ++ struct compat_xflow_flow flow; ++ u32 flags; ++}; ++ ++struct compat_xflow_flowvec { ++ compat_uptr_t flows; ++ u32 n_flows; ++}; ++ ++struct compat_xflow_execute { ++ u16 in_port; ++ u16 reserved1; ++ u32 reserved2; ++ ++ compat_uptr_t actions; ++ u32 n_actions; ++ ++ compat_uptr_t data; ++ u32 length; ++}; ++ ++struct compat_xflow_vport_add { ++ char port_type[VPORT_TYPE_SIZE]; ++ char devname[16]; /* IFNAMSIZ */ ++ compat_uptr_t config; ++}; ++ ++struct compat_xflow_vport_mod { ++ char devname[16]; /* IFNAMSIZ */ ++ compat_uptr_t config; ++}; ++#endif /* CONFIG_COMPAT */ ++ ++#endif /* xflow-compat.h */ diff --cc include/openvswitch/automake.mk index 61859979f,3cc83d879..2bdf0444d --- a/include/openvswitch/automake.mk +++ b/include/openvswitch/automake.mk @@@ -1,5 -1,6 +1,6 @@@ noinst_HEADERS += \ include/openvswitch/gre.h \ include/openvswitch/brcompat-netlink.h \ + include/openvswitch/internal_dev.h \ - include/openvswitch/datapath-protocol.h + include/openvswitch/xflow.h diff --cc include/openvswitch/gre.h index 2b24cf6ef,3c5691b0e..396a67d9d --- a/include/openvswitch/gre.h +++ b/include/openvswitch/gre.h @@@ -1,5 -1,5 +1,5 @@@ /* -- * Copyright (c) 2008, 2009 Nicira Networks. ++ * Copyright (c) 2008, 2009, 2010 Nicira Networks. * * This file is offered under your choice of two licenses: Apache 2.0 or GNU * GPL 2.0 or later. The permission statements for each of these licenses is @@@ -40,36 -40,28 +40,24 @@@ #ifndef OPENVSWITCH_GRE_H #define OPENVSWITCH_GRE_H 1 - #include - #include -#ifdef __KERNEL__ + #include -#else -#include -#endif - #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) - #define GRE_IOCTL_ONLY - #elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) - enum - { - IFLA_GRE_UNSPEC, - IFLA_GRE_LINK, - IFLA_GRE_IFLAGS, - IFLA_GRE_OFLAGS, - IFLA_GRE_IKEY, - IFLA_GRE_OKEY, - IFLA_GRE_LOCAL, - IFLA_GRE_REMOTE, - IFLA_GRE_TTL, - IFLA_GRE_TOS, - IFLA_GRE_PMTUDISC, - __IFLA_GRE_MAX, - }; - - #define IFLA_GRE_MAX (__IFLA_GRE_MAX - 1) - #endif + #define GRE_F_IN_CSUM (1 << 0) /* Require incoming packets to have checksums. */ + #define GRE_F_OUT_CSUM (1 << 1) /* Checksum outgoing packets. */ + #define GRE_F_IN_KEY_MATCH (1 << 2) /* Store the key in tun_id to match in flow table. */ + #define GRE_F_OUT_KEY_ACTION (1 << 3) /* Get the key from a SET_TUNNEL action. */ + #define GRE_F_TOS_INHERIT (1 << 4) /* Inherit the ToS from the inner packet. */ + #define GRE_F_TTL_INHERIT (1 << 5) /* Inherit the TTL from the inner packet. */ + #define GRE_F_PMTUD (1 << 6) /* Enable path MTU discovery. */ - #define GRE_IOCTL_DEVICE "gre0" - - #define SIOCGETGRETAP SIOCGETTUNNEL - #define SIOCADDGRETAP (SIOCDEVPRIVATE + 10) - #define SIOCDELGRETAP SIOCDELTUNNEL - #define SIOCCHGGRETAP (SIOCDEVPRIVATE + 11) + struct gre_port_config { + __u32 flags; + __be32 saddr; + __be32 daddr; + __be32 in_key; + __be32 out_key; + __u8 tos; + __u8 ttl; + }; #endif /* openvswitch/gre.h */ diff --cc include/openvswitch/xflow.h index 8920a24d6,000000000..37a900a43 mode 100644,000000..100644 --- a/include/openvswitch/xflow.h +++ b/include/openvswitch/xflow.h @@@ -1,353 -1,0 +1,416 @@@ +/* + * Copyright (c) 2009, 2010 Nicira Networks. + * + * This file is offered under your choice of two licenses: Apache 2.0 or GNU + * GPL 2.0 or later. The permission statements for each of these licenses is + * given below. You may license your modifications to this file under either + * of these licenses or both. If you wish to license your modifications under + * only one of these licenses, delete the permission text for the other + * license. + * + * ---------------------------------------------------------------------- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ---------------------------------------------------------------------- + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * ---------------------------------------------------------------------- + */ + - /* Protocol between userspace and kernel datapath. */ ++/* Protocol between userspace and kernel datapath. ++ * ++ * Be sure to update datapath/xflow-compat.h if you change any of the ++ * structures in here. */ + +#ifndef XFLOW_H +#define XFLOW_H 1 + +#ifdef __KERNEL__ +#include +#else +#include +#endif +#include + +#define XFLOW_MAX 256 /* Maximum number of datapaths. */ + +#define XFLOW_DP_CREATE _IO('O', 0) +#define XFLOW_DP_DESTROY _IO('O', 1) +#define XFLOW_DP_STATS _IOW('O', 2, struct xflow_stats) + +#define XFLOW_GET_DROP_FRAGS _IOW('O', 3, int) +#define XFLOW_SET_DROP_FRAGS _IOR('O', 4, int) + +#define XFLOW_GET_LISTEN_MASK _IOW('O', 5, int) +#define XFLOW_SET_LISTEN_MASK _IOR('O', 6, int) + - #define XFLOW_PORT_ADD _IOR('O', 7, struct xflow_port) - #define XFLOW_PORT_DEL _IOR('O', 8, int) ++#define XFLOW_PORT_ATTACH _IOR('O', 7, struct xflow_port) ++#define XFLOW_PORT_DETACH _IOR('O', 8, int) +#define XFLOW_PORT_QUERY _IOWR('O', 9, struct xflow_port) +#define XFLOW_PORT_LIST _IOWR('O', 10, struct xflow_portvec) + +#define XFLOW_PORT_GROUP_SET _IOR('O', 11, struct xflow_port_group) +#define XFLOW_PORT_GROUP_GET _IOWR('O', 12, struct xflow_port_group) + +#define XFLOW_FLOW_GET _IOWR('O', 13, struct xflow_flow) +#define XFLOW_FLOW_PUT _IOWR('O', 14, struct xflow_flow) +#define XFLOW_FLOW_LIST _IOWR('O', 15, struct xflow_flowvec) +#define XFLOW_FLOW_FLUSH _IO('O', 16) +#define XFLOW_FLOW_DEL _IOWR('O', 17, struct xflow_flow) + +#define XFLOW_EXECUTE _IOR('O', 18, struct xflow_execute) + +#define XFLOW_SET_SFLOW_PROBABILITY _IOR('O', 19, int) +#define XFLOW_GET_SFLOW_PROBABILITY _IOW('O', 20, int) + ++#define XFLOW_VPORT_ADD _IOR('O', 21, struct xflow_vport_add) ++#define XFLOW_VPORT_MOD _IOR('O', 22, struct xflow_vport_mod) ++#define XFLOW_VPORT_DEL _IO('O', 23) ++#define XFLOW_VPORT_STATS_GET _IOWR('O', 24, struct xflow_vport_stats_req) ++#define XFLOW_VPORT_ETHER_GET _IOWR('O', 25, struct xflow_vport_ether) ++#define XFLOW_VPORT_ETHER_SET _IOW('O', 26, struct xflow_vport_ether) ++#define XFLOW_VPORT_MTU_GET _IOWR('O', 27, struct xflow_vport_mtu) ++#define XFLOW_VPORT_MTU_SET _IOW('O', 28, struct xflow_vport_mtu) ++ +struct xflow_stats { + /* Flows. */ + __u32 n_flows; /* Number of flows in flow table. */ + __u32 cur_capacity; /* Current flow table capacity. */ + __u32 max_capacity; /* Maximum expansion of flow table capacity. */ + + /* Ports. */ + __u32 n_ports; /* Current number of ports. */ + __u32 max_ports; /* Maximum supported number of ports. */ + __u16 max_groups; /* Maximum number of port groups. */ + __u16 reserved; + + /* Lookups. */ + __u64 n_frags; /* Number of dropped IP fragments. */ + __u64 n_hit; /* Number of flow table matches. */ + __u64 n_missed; /* Number of flow table misses. */ + __u64 n_lost; /* Number of misses not sent to userspace. */ + + /* Queues. */ + __u16 max_miss_queue; /* Max length of XFLOWL_MISS queue. */ + __u16 max_action_queue; /* Max length of XFLOWL_ACTION queue. */ + __u16 max_sflow_queue; /* Max length of XFLOWL_SFLOW queue. */ +}; + +/* Logical ports. */ +#define XFLOWP_LOCAL ((__u16)0) +#define XFLOWP_NONE ((__u16)-1) +#define XFLOWP_NORMAL ((__u16)-2) + +/* Listening channels. */ +#define _XFLOWL_MISS_NR 0 /* Packet missed in flow table. */ +#define XFLOWL_MISS (1 << _XFLOWL_MISS_NR) +#define _XFLOWL_ACTION_NR 1 /* Packet output to XFLOWP_CONTROLLER. */ +#define XFLOWL_ACTION (1 << _XFLOWL_ACTION_NR) +#define _XFLOWL_SFLOW_NR 2 /* sFlow samples. */ +#define XFLOWL_SFLOW (1 << _XFLOWL_SFLOW_NR) +#define XFLOWL_ALL (XFLOWL_MISS | XFLOWL_ACTION | XFLOWL_SFLOW) + +/** + * struct xflow_msg - format of messages read from datapath fd. + * @type: One of the %_XFLOWL_* constants. + * @length: Total length of message, including this header. + * @port: Port that received the packet embedded in this message. + * @reserved: Not currently used. Should be set to 0. + * @arg: Argument value whose meaning depends on @type. + * + * For @type == %_XFLOWL_MISS_NR, the header is followed by packet data. The - * @arg member is unused and set to 0. - * ++ * @arg member is the ID (in network byte order) of the tunnel that ++ * encapsulated this packet. It is 0 if the packet was not received on a tunnel. * + * For @type == %_XFLOWL_ACTION_NR, the header is followed by packet data. The + * @arg member is copied from the &struct xflow_action_controller that caused + * the &struct xflow_msg to be composed. + * + * For @type == %_XFLOWL_SFLOW_NR, the header is followed by &struct + * xflow_sflow_sample_header, then by an array of &union xflow_action (the + * number of which is specified in &struct xflow_sflow_sample_header), then by + * packet data. + */ +struct xflow_msg { + __u32 type; + __u32 length; + __u16 port; + __u16 reserved; + __u32 arg; +}; + +/** + * struct xflow_sflow_sample_header - header added to sFlow sampled packet. + * @sample_pool: Number of packets that were candidates for sFlow sampling, + * regardless of whether they were actually chosen and sent down to userspace. + * @n_actions: Number of "union xflow_action"s immediately following this + * header. + * + * This header follows &struct xflow_msg when that structure's @type is + * %_XFLOWL_SFLOW_NR, and it is itself followed by an array of &union + * xflow_action (the number of which is specified in @n_actions) and then by + * packet data. + */ +struct xflow_sflow_sample_header { + __u32 sample_pool; + __u32 n_actions; +}; + +#define XFLOW_PORT_INTERNAL (1 << 0) /* This port is simulated. */ +struct xflow_port { + char devname[16]; /* IFNAMSIZ */ + __u16 port; + __u16 flags; + __u32 reserved2; +}; + +struct xflow_portvec { + struct xflow_port *ports; - int n_ports; ++ __u32 n_ports; +}; + +struct xflow_port_group { + __u16 *ports; + __u16 n_ports; /* Number of ports. */ + __u16 group; /* Group number. */ +}; + +struct xflow_flow_stats { + __u64 n_packets; /* Number of matched packets. */ + __u64 n_bytes; /* Number of matched bytes. */ + __u64 used_sec; /* Time last used. */ + __u32 used_nsec; + __u8 tcp_flags; + __u8 ip_tos; + __u16 error; /* Used by XFLOW_FLOW_GET. */ +}; + +/* + * The datapath protocol adopts the Linux convention for TCI fields: if an + * 802.1Q header is present then its TCI value is used verbatim except that the + * CFI bit (0x1000) is always set to 1, and all-bits-zero indicates no 802.1Q + * header. + */ +#define XFLOW_TCI_PRESENT 0x1000 /* CFI bit */ + +struct xflow_key { ++ __be32 tun_id; /* Encapsulating tunnel ID. */ + __be32 nw_src; /* IP source address. */ + __be32 nw_dst; /* IP destination address. */ + __u16 in_port; /* Input switch port. */ + __be16 dl_tci; /* All zeros if 802.1Q header absent, + * XFLOW_TCI_PRESENT set if present. */ + __be16 dl_type; /* Ethernet frame type. */ + __be16 tp_src; /* TCP/UDP source port. */ + __be16 tp_dst; /* TCP/UDP destination port. */ + __u8 dl_src[ETH_ALEN]; /* Ethernet source address. */ + __u8 dl_dst[ETH_ALEN]; /* Ethernet destination address. */ + __u8 nw_proto; /* IP protocol or low 8 bits of ARP opcode. */ + __u8 nw_tos; /* IP ToS (DSCP field, 6 bits). */ +}; + +/* Flags for XFLOW_FLOW. */ +#define XFLOWFF_ZERO_TCP_FLAGS (1 << 0) /* Zero the TCP flags. */ + +struct xflow_flow { + struct xflow_flow_stats stats; + struct xflow_key key; + union xflow_action *actions; + __u32 n_actions; + __u32 flags; +}; + +/* Flags for XFLOW_FLOW_PUT. */ +#define XFLOWPF_CREATE (1 << 0) /* Allow creating a new flow. */ +#define XFLOWPF_MODIFY (1 << 1) /* Allow modifying an existing flow. */ +#define XFLOWPF_ZERO_STATS (1 << 2) /* Zero the stats of existing flow. */ + +/* XFLOW_FLOW_PUT argument. */ +struct xflow_flow_put { + struct xflow_flow flow; + __u32 flags; +}; + +struct xflow_flowvec { + struct xflow_flow *flows; - int n_flows; ++ __u32 n_flows; +}; + +/* Action types. */ +#define XFLOWAT_OUTPUT 0 /* Output to switch port. */ +#define XFLOWAT_OUTPUT_GROUP 1 /* Output to all ports in group. */ +#define XFLOWAT_CONTROLLER 2 /* Send copy to controller. */ +#define XFLOWAT_SET_DL_TCI 3 /* Set the 802.1q VLAN VID and/or PCP. */ +#define XFLOWAT_STRIP_VLAN 4 /* Strip the 802.1q header. */ +#define XFLOWAT_SET_DL_SRC 5 /* Ethernet source address. */ +#define XFLOWAT_SET_DL_DST 6 /* Ethernet destination address. */ +#define XFLOWAT_SET_NW_SRC 7 /* IP source address. */ +#define XFLOWAT_SET_NW_DST 8 /* IP destination address. */ +#define XFLOWAT_SET_NW_TOS 9 /* IP ToS/DSCP field (6 bits). */ +#define XFLOWAT_SET_TP_SRC 10 /* TCP/UDP source port. */ +#define XFLOWAT_SET_TP_DST 11 /* TCP/UDP destination port. */ - #define XFLOWAT_N_ACTIONS 12 ++#define XFLOWAT_SET_TUNNEL 12 /* Set the encapsulating tunnel ID. */ ++#define XFLOWAT_N_ACTIONS 13 + +struct xflow_action_output { + __u16 type; /* XFLOWAT_OUTPUT. */ + __u16 port; /* Output port. */ + __u16 reserved1; + __u16 reserved2; +}; + +struct xflow_action_output_group { + __u16 type; /* XFLOWAT_OUTPUT_GROUP. */ + __u16 group; /* Group number. */ + __u16 reserved1; + __u16 reserved2; +}; + +struct xflow_action_controller { + __u16 type; /* XFLOWAT_OUTPUT_CONTROLLER. */ + __u16 reserved; + __u32 arg; /* Copied to struct xflow_msg 'arg' member. */ +}; + ++struct xflow_action_tunnel { ++ __u16 type; /* XFLOWAT_SET_TUNNEL. */ ++ __u16 reserved; ++ __be32 tun_id; /* Tunnel ID. */ ++}; ++ +/* Action structure for XFLOWAT_SET_DL_TCI. */ +struct xflow_action_dl_tci { + __u16 type; /* XFLOWAT_SET_DL_TCI. */ + __be16 tci; /* New TCI. Bits not in mask must be zero. */ + __be16 mask; /* 0x0fff to set VID, 0xe000 to set PCP, + or 0xefff to set both. */ + __u16 reserved; +}; + +/* Action structure for XFLOWAT_SET_DL_SRC/DST. */ +struct xflow_action_dl_addr { + __u16 type; /* XFLOWAT_SET_DL_SRC/DST. */ + __u8 dl_addr[ETH_ALEN]; /* Ethernet address. */ +}; + +/* Action structure for XFLOWAT_SET_NW_SRC/DST. */ +struct xflow_action_nw_addr { + __u16 type; /* XFLOWAT_SET_TW_SRC/DST. */ + __u16 reserved; + __be32 nw_addr; /* IP address. */ +}; + +struct xflow_action_nw_tos { + __u16 type; /* XFLOWAT_SET_NW_TOS. */ + __u8 nw_tos; /* IP ToS/DSCP field (6 bits). */ + __u8 reserved1; + __u16 reserved2; + __u16 reserved3; +}; + +/* Action structure for XFLOWAT_SET_TP_SRC/DST. */ +struct xflow_action_tp_port { + __u16 type; /* XFLOWAT_SET_TP_SRC/DST. */ + __be16 tp_port; /* TCP/UDP port. */ + __u16 reserved1; + __u16 reserved2; +}; + +union xflow_action { + __u16 type; + struct xflow_action_output output; + struct xflow_action_output_group output_group; + struct xflow_action_controller controller; ++ struct xflow_action_tunnel tunnel; + struct xflow_action_dl_tci dl_tci; + struct xflow_action_dl_addr dl_addr; + struct xflow_action_nw_addr nw_addr; + struct xflow_action_nw_tos nw_tos; + struct xflow_action_tp_port tp_port; +}; + +struct xflow_execute { + __u16 in_port; + __u16 reserved1; + __u32 reserved2; + + union xflow_action *actions; + __u32 n_actions; + + const void *data; + __u32 length; +}; + ++#define VPORT_TYPE_SIZE 16 ++struct xflow_vport_add { ++ char port_type[VPORT_TYPE_SIZE]; ++ char devname[16]; /* IFNAMSIZ */ ++ void *config; ++}; ++ ++struct xflow_vport_mod { ++ char devname[16]; /* IFNAMSIZ */ ++ void *config; ++}; ++ ++struct xflow_vport_stats { ++ __u64 rx_packets; ++ __u64 tx_packets; ++ __u64 rx_bytes; ++ __u64 tx_bytes; ++ __u64 rx_dropped; ++ __u64 tx_dropped; ++ __u64 rx_errors; ++ __u64 tx_errors; ++ __u64 rx_frame_err; ++ __u64 rx_over_err; ++ __u64 rx_crc_err; ++ __u64 collisions; ++}; ++ ++struct xflow_vport_stats_req { ++ char devname[16]; /* IFNAMSIZ */ ++ struct xflow_vport_stats stats; ++}; ++ ++struct xflow_vport_ether { ++ char devname[16]; /* IFNAMSIZ */ ++ unsigned char ether_addr[ETH_ALEN]; ++}; ++ ++struct xflow_vport_mtu { ++ char devname[16]; /* IFNAMSIZ */ ++ __u16 mtu; ++}; ++ +/* Values below this cutoff are 802.3 packets and the two bytes + * following MAC addresses are used as a frame length. Otherwise, the + * two bytes are used as the Ethernet type. + */ +#define XFLOW_DL_TYPE_ETH2_CUTOFF 0x0600 + +/* Value of dl_type to indicate that the frame does not include an + * Ethernet type. + */ +#define XFLOW_DL_TYPE_NOT_ETH_TYPE 0x05ff + +#endif /* openvswitch/xflow.h */ diff --cc lib/automake.mk index 27a8c1825,56675384d..a6c9f61c5 --- a/lib/automake.mk +++ b/lib/automake.mk @@@ -195,24 -196,26 +196,25 @@@ EXTRA_DIST += lib/dhparams.h EXTRA_DIST += \ - lib/common.man \ lib/common-syn.man \ - lib/daemon.man \ + lib/common.man \ lib/daemon-syn.man \ - lib/dpif.man \ + lib/daemon.man \ lib/leak-checker.man \ - lib/ssl-bootstrap.man \ lib/ssl-bootstrap-syn.man \ + lib/ssl-bootstrap.man \ lib/ssl-peer-ca-cert.man \ - lib/ssl.man \ lib/ssl-syn.man \ + lib/ssl.man \ + lib/unixctl.man \ + lib/unixctl-syn.man \ lib/vconn-active.man \ lib/vconn-passive.man \ - lib/vlog-unixctl.man \ lib/vlog-syn.man \ - lib/vlog.man - + lib/vlog-unixctl.man \ + lib/vlog.man \ + lib/xfif.man - lib/dirs.c: Makefile ($(ro_c) && \ echo 'const char ovs_pkgdatadir[] = "$(pkgdatadir)";' && \ diff --cc lib/classifier.c index dc2e60785,4bd894216..1aadfe5c1 --- a/lib/classifier.c +++ b/lib/classifier.c @@@ -52,30 -52,32 +52,30 @@@ static bool rules_match_1wild(const str static bool rules_match_2wild(const struct cls_rule *wild1, const struct cls_rule *wild2, int field_idx); --/* Converts the flow in 'flow' into a cls_rule in 'rule', with the given -- * 'wildcards' and 'priority'.*/ ++/* Converts the flow in 'flow' into a cls_rule in 'rule'. */ void - cls_rule_from_flow(struct cls_rule *rule, const flow_t *flow) -cls_rule_from_flow(const flow_t *flow, uint32_t wildcards, - unsigned int priority, struct cls_rule *rule) ++cls_rule_from_flow(const flow_t *flow, struct cls_rule *rule) { - assert(!flow->reserved[0] && !flow->reserved[1] && !flow->reserved[2]); rule->flow = *flow; - flow_wildcards_init(&rule->wc, wildcards); - rule->priority = priority; - rule->table_idx = table_idx_from_wildcards(rule->wc.wildcards); + if (!rule->flow.wildcards && rule->flow.priority < UINT16_MAX) { + rule->flow.priority = UINT16_MAX; + } + flow_wildcards_init(&rule->wc, flow->wildcards); + rule->table_idx = table_idx_from_wildcards(flow->wildcards); } /* Converts the ofp_match in 'match' into a cls_rule in 'rule', with the given - * 'priority'. */ + * 'priority'. If 'tun_id_from_cookie' is set then the upper 32 bits of + * 'cookie' are stored in the rule as the tunnel ID. */ void - cls_rule_from_match(struct cls_rule *rule, unsigned int priority, - const struct ofp_match *match) + cls_rule_from_match(const struct ofp_match *match, unsigned int priority, + bool tun_id_from_cookie, uint64_t cookie, + struct cls_rule *rule) { - flow_from_match(&rule->flow, - rule->flow.wildcards ? priority : UINT16_MAX, - match); - uint32_t wildcards; - flow_from_match(match, tun_id_from_cookie, cookie, &rule->flow, &wildcards); - flow_wildcards_init(&rule->wc, wildcards); - rule->priority = rule->wc.wildcards ? priority : UINT16_MAX; - rule->table_idx = table_idx_from_wildcards(rule->wc.wildcards); ++ flow_from_match(match, rule->flow.wildcards ? priority : UINT16_MAX, ++ tun_id_from_cookie, cookie, &rule->flow); + flow_wildcards_init(&rule->wc, rule->flow.wildcards); + rule->table_idx = table_idx_from_wildcards(rule->flow.wildcards); } /* Converts 'rule' to a string and returns the string. The caller must free @@@ -314,10 -307,10 +314,10 @@@ classifier_lookup_wild(const struct cla struct cls_rule target; int i; - cls_rule_from_flow(&target, flow); - cls_rule_from_flow(flow, 0, 0, &target); ++ cls_rule_from_flow(flow, &target); for (i = 0; i < CLS_N_FIELDS; i++) { struct cls_rule *rule = search_table(&cls->tables[i], i, &target); - if (rule && (!best || rule->priority > best->priority)) { + if (rule && (!best || rule->flow.priority > best->flow.priority)) { best = rule; } } @@@ -338,8 -332,8 +338,8 @@@ classifier_find_rule_exactly(const stru return search_exact_table(cls, flow_hash(target, 0), target); } - assert(target->wildcards == (target->wildcards & OFPFW_ALL)); - assert(wildcards == (wildcards & OVSFW_ALL)); - table_idx = table_idx_from_wildcards(wildcards); ++ assert(target->wildcards == (target->wildcards & OVSFW_ALL)); + table_idx = table_idx_from_wildcards(target->wildcards); hash = hash_fields(target, table_idx); HMAP_FOR_EACH_WITH_HASH (bucket, struct cls_bucket, hmap_node, hash, &cls->tables[table_idx]) { @@@ -373,7 -369,7 +373,7 @@@ classifier_rule_overlaps(const struct c true : false; } - cls_rule_from_flow(&target_rule, target); - cls_rule_from_flow(target, wildcards, priority, &target_rule); ++ cls_rule_from_flow(target, &target_rule); for (tbl = &cls->tables[0]; tbl < &cls->tables[CLS_N_FIELDS]; tbl++) { struct cls_bucket *bucket; @@@ -403,12 -399,9 +403,12 @@@ * wildcards and an exact-match rule will never be in the same table. */ void classifier_for_each_match(const struct classifier *cls, - const struct cls_rule *target, + const flow_t *target_flow, int include, cls_cb_func *callback, void *aux) { + struct cls_rule target; + - cls_rule_from_flow(&target, target_flow); ++ cls_rule_from_flow(target_flow, &target); if (include & CLS_INC_WILD) { const struct hmap *table; diff --cc lib/classifier.h index 53dfc88c7,35516022b..b515bf5b9 --- a/lib/classifier.h +++ b/lib/classifier.h @@@ -44,14 -44,11 +44,15 @@@ #include "flow.h" #include "hmap.h" #include "list.h" + #include "openflow/nicira-ext.h" #include "openflow/openflow.h" +#ifdef __cplusplus +extern "C" { +#endif + /* Number of bytes of fields in a rule. */ - #define CLS_N_BYTES 31 + #define CLS_N_BYTES 37 /* Fields in a rule. * @@@ -124,9 -122,12 +126,10 @@@ struct cls_rule unsigned int table_idx; /* Index into struct classifier 'tables'. */ }; - void cls_rule_from_flow(struct cls_rule *, const flow_t *); - void cls_rule_from_match(struct cls_rule *, unsigned int priority, - const struct ofp_match *); -void cls_rule_from_flow(const flow_t *, uint32_t wildcards, - unsigned int priority, struct cls_rule *); ++void cls_rule_from_flow(const flow_t *, struct cls_rule *); + void cls_rule_from_match(const struct ofp_match *, unsigned int priority, + bool tun_id_from_cookie, uint64_t cookie, + struct cls_rule *); char *cls_rule_to_string(const struct cls_rule *); void cls_rule_print(const struct cls_rule *); void cls_rule_moved(struct classifier *, diff --cc lib/flow.c index 9437d3f1e,7ae90d2b5..bd500857e --- a/lib/flow.c +++ b/lib/flow.c @@@ -89,9 -89,12 +89,12 @@@ pull_vlan(struct ofpbuf *packet return ofpbuf_try_pull(packet, VLAN_HEADER_LEN); } - /* Returns 1 if 'packet' is an IP fragment, 0 otherwise. */ + /* Returns 1 if 'packet' is an IP fragment, 0 otherwise. + * 'tun_id' is in network byte order, while 'in_port' is in host byte order. - * These byte orders are the same as they are in struct odp_flow_key. */ ++ * These byte orders are the same as they are in struct xflow_key. */ int - flow_extract(struct ofpbuf *packet, uint16_t in_port, flow_t *flow) + flow_extract(struct ofpbuf *packet, uint32_t tun_id, uint16_t in_port, + flow_t *flow) { struct ofpbuf b = *packet; struct eth_header *eth; @@@ -239,13 -243,18 +243,15 @@@ flow_extract_stats(const flow_t *flow, stats->n_packets = 1; } --/* Extract 'flow' with 'wildcards' into the OpenFlow match structure -- * 'match'. */ ++/* Extract 'flow' into the OpenFlow match structure 'match'. */ void - flow_to_match(const flow_t *flow, struct ofp_match *match) -flow_to_match(const flow_t *flow, uint32_t wildcards, bool tun_id_from_cookie, ++flow_to_match(const flow_t *flow, bool tun_id_from_cookie, + struct ofp_match *match) { - match->wildcards = htonl(flow->wildcards); - if (!tun_id_from_cookie) { - wildcards &= OFPFW_ALL; - } - match->wildcards = htonl(wildcards); ++ uint32_t wildcard_mask = tun_id_from_cookie ? OVSFW_ALL : OFPFW_ALL; ++ match->wildcards = htonl(flow->wildcards & wildcard_mask); + - match->in_port = htons(flow->in_port == ODPP_LOCAL ? OFPP_LOCAL + match->in_port = htons(flow->in_port == XFLOWP_LOCAL ? OFPP_LOCAL : flow->in_port); match->dl_vlan = flow->dl_vlan; match->dl_vlan_pcp = flow->dl_vlan_pcp; @@@ -263,14 -272,20 +269,20 @@@ } void - flow_from_match(flow_t *flow, unsigned int priority, - const struct ofp_match *match) -flow_from_match(const struct ofp_match *match, bool tun_id_from_cookie, - uint64_t cookie, flow_t *flow, uint32_t *flow_wildcards) ++flow_from_match(const struct ofp_match *match, uint32_t priority, ++ bool tun_id_from_cookie, uint64_t cookie, flow_t *flow) { - uint32_t wildcards = ntohl(match->wildcards); - + flow->wildcards = ntohl(match->wildcards); + flow->priority = priority; flow->nw_src = match->nw_src; flow->nw_dst = match->nw_dst; - if (tun_id_from_cookie && !(wildcards & NXFW_TUN_ID)) { ++ if (tun_id_from_cookie && !(flow->wildcards & NXFW_TUN_ID)) { + flow->tun_id = htonl(ntohll(cookie) >> 32); + } else { - wildcards |= NXFW_TUN_ID; ++ flow->wildcards |= NXFW_TUN_ID; + flow->tun_id = 0; + } - flow->in_port = (match->in_port == htons(OFPP_LOCAL) ? ODPP_LOCAL + flow->in_port = (match->in_port == htons(OFPP_LOCAL) ? XFLOWP_LOCAL : ntohs(match->in_port)); flow->dl_vlan = match->dl_vlan; flow->dl_vlan_pcp = match->dl_vlan_pcp; @@@ -294,16 -314,27 +306,30 @@@ flow_to_string(const flow_t *flow void flow_format(struct ds *ds, const flow_t *flow) { - ds_put_format(ds, "tunnel%08"PRIx32":in_port%04"PRIx16 + ds_put_format(ds, "wild%08"PRIx32" pri%"PRIu32" " - "in_port%04x:vlan%d:pcp%d mac"ETH_ADDR_FMT - "->"ETH_ADDR_FMT" type%04x proto%"PRId8" tos%"PRIu8 - " ip"IP_FMT"->"IP_FMT" port%d->%d", - flow->wildcards, flow->priority, - flow->in_port, ntohs(flow->dl_vlan), flow->dl_vlan_pcp, - ETH_ADDR_ARGS(flow->dl_src), ETH_ADDR_ARGS(flow->dl_dst), - ntohs(flow->dl_type), flow->nw_proto, flow->nw_tos, - IP_ARGS(&flow->nw_src), IP_ARGS(&flow->nw_dst), - ntohs(flow->tp_src), ntohs(flow->tp_dst)); ++ "tunnel%08"PRIx32":in_port%04"PRIx16 + ":vlan%"PRIu16":pcp%"PRIu8 + " mac"ETH_ADDR_FMT"->"ETH_ADDR_FMT + " type%04"PRIx16 + " proto%"PRIu8 + " tos%"PRIu8 + " ip"IP_FMT"->"IP_FMT + " port%"PRIu16"->%"PRIu16, ++ flow->wildcards, ++ flow->priority, + ntohl(flow->tun_id), + flow->in_port, + ntohs(flow->dl_vlan), + flow->dl_vlan_pcp, + ETH_ADDR_ARGS(flow->dl_src), + ETH_ADDR_ARGS(flow->dl_dst), + ntohs(flow->dl_type), + flow->nw_proto, + flow->nw_tos, + IP_ARGS(&flow->nw_src), + IP_ARGS(&flow->nw_dst), + ntohs(flow->tp_src), + ntohs(flow->tp_dst)); } void diff --cc lib/flow.h index 3a6ae4cca,058404c87..0cfe9e3b7 --- a/lib/flow.h +++ b/lib/flow.h @@@ -21,47 -21,25 +21,50 @@@ #include #include #include - #include "hash.h" + #include "openflow/nicira-ext.h" #include "openflow/openflow.h" + #include "hash.h" -#include "openvswitch/datapath-protocol.h" +#include "openvswitch/xflow.h" #include "util.h" struct ds; struct ofp_match; struct ofpbuf; -typedef struct odp_flow_key flow_t; +typedef struct flow flow_t; +struct flow { + uint32_t wildcards; /* Wildcards. */ + uint32_t priority; /* Priority. */ ++ uint32_t tun_id; /* Encapsulating tunnel ID. */ + uint32_t nw_src; /* IP source address. */ + uint32_t nw_dst; /* IP destination address. */ + uint16_t in_port; /* Input switch port. */ + uint16_t dl_vlan; /* Input VLAN. */ + uint16_t dl_type; /* Ethernet frame type. */ + uint16_t tp_src; /* TCP/UDP source port. */ + uint16_t tp_dst; /* TCP/UDP destination port. */ + uint8_t dl_src[ETH_ALEN]; /* Ethernet source address. */ + uint8_t dl_dst[ETH_ALEN]; /* Ethernet destination address. */ + uint8_t nw_proto; /* IP protocol or low 8 bits of ARP opcode. */ + uint8_t dl_vlan_pcp; /* Input VLAN priority. */ + uint8_t nw_tos; /* IP ToS (DSCP field, 6 bits). */ +}; + +/* Assert that there are FLOW_SIG_SIZE bytes of significant data in "struct + * flow", followed by FLOW_PAD_SIZE bytes of padding. */ - #define FLOW_SIG_SIZE 41 ++#define FLOW_SIG_SIZE 45 +#define FLOW_PAD_SIZE 3 +BUILD_ASSERT_DECL(offsetof(struct flow, nw_tos) == FLOW_SIG_SIZE - 1); +BUILD_ASSERT_DECL(sizeof(((struct flow *)0)->nw_tos) == 1); +BUILD_ASSERT_DECL(sizeof(struct flow) == FLOW_SIG_SIZE + FLOW_PAD_SIZE); - int flow_extract(struct ofpbuf *, uint16_t in_port, flow_t *); + int flow_extract(struct ofpbuf *, uint32_t tun_id, uint16_t in_port, flow_t *); void flow_extract_stats(const flow_t *flow, struct ofpbuf *packet, - struct odp_flow_stats *stats); -void flow_to_match(const flow_t *, uint32_t wildcards, bool tun_id_cookie, - struct ofp_match *); -void flow_from_match(const struct ofp_match *, bool tun_id_from_cookie, - uint64_t cookie, flow_t *, uint32_t *wildcards); + struct xflow_flow_stats *stats); - void flow_to_match(const flow_t *, struct ofp_match *); - void flow_from_match(flow_t *, unsigned int priority, - const struct ofp_match *); ++void flow_to_match(const flow_t *, ++ bool tun_id_from_cookie, struct ofp_match *); ++void flow_from_match(const struct ofp_match *, uint32_t priority, ++ bool tun_id_from_cookie, uint64_t cookie, flow_t *); char *flow_to_string(const flow_t *); void flow_format(struct ds *, const flow_t *); void flow_print(FILE *, const flow_t *); @@@ -115,9 -96,9 +118,9 @@@ flow_nw_bits_to_mask(uint32_t wildcards static inline void flow_wildcards_init(struct flow_wildcards *wc, uint32_t wildcards) { - wildcards &= OFPFW_ALL; - wc->wildcards = wildcards & OVSFW_ALL; - wc->nw_src_mask = flow_nw_bits_to_mask(wc->wildcards, OFPFW_NW_SRC_SHIFT); - wc->nw_dst_mask = flow_nw_bits_to_mask(wc->wildcards, OFPFW_NW_DST_SHIFT); ++ wildcards &= OVSFW_ALL; + wc->nw_src_mask = flow_nw_bits_to_mask(wildcards, OFPFW_NW_SRC_SHIFT); + wc->nw_dst_mask = flow_nw_bits_to_mask(wildcards, OFPFW_NW_DST_SHIFT); } #endif /* flow.h */ diff --cc lib/netdev-gre.c index 000000000,4e28fee0c..39a51f7c8 mode 000000,100644..100644 --- a/lib/netdev-gre.c +++ b/lib/netdev-gre.c @@@ -1,0 -1,477 +1,477 @@@ + /* + * Copyright (c) 2010 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + #include + #include + #include + #include + #include + + #include "list.h" + #include "netdev-provider.h" + #include "openflow/openflow.h" -#include "openvswitch/datapath-protocol.h" + #include "openvswitch/gre.h" ++#include "openvswitch/xflow.h" + #include "packets.h" + #include "shash.h" + #include "socket-util.h" + + #define THIS_MODULE VLM_netdev_gre + #include "vlog.h" + + struct netdev_dev_gre { + struct netdev_dev netdev_dev; + }; + + struct netdev_gre { + struct netdev netdev; + }; + + struct netdev_gre_notifier { + struct netdev_notifier notifier; + struct list node; + }; + + static int ioctl_fd = -1; + static struct shash netdev_gre_notifiers = + SHASH_INITIALIZER(&netdev_gre_notifiers); + + static void poll_notify(const struct netdev_gre *netdev); + + static struct netdev_dev_gre * + netdev_dev_gre_cast(const struct netdev_dev *netdev_dev) + { + netdev_dev_assert_class(netdev_dev, &netdev_gre_class); + return CONTAINER_OF(netdev_dev, struct netdev_dev_gre, netdev_dev); + } + + static struct netdev_gre * + netdev_gre_cast(const struct netdev *netdev) + { + netdev_assert_class(netdev, &netdev_gre_class); + return CONTAINER_OF(netdev, struct netdev_gre, netdev); + } + + static int + netdev_gre_init(void) + { + static int status = -1; + if (status < 0) { + ioctl_fd = open("/dev/net/dp0", O_RDONLY | O_NONBLOCK); + status = ioctl_fd >= 0 ? 0 : errno; + if (status) { + VLOG_ERR("failed to open ioctl fd: %s", strerror(status)); + } + } + return status; + } + + static int + do_ioctl(int cmd, void *arg) + { + return ioctl(ioctl_fd, cmd, arg) ? errno : 0; + } + + static int + parse_config(const char *name, const struct shash *args, + struct gre_port_config *config) + { + struct shash_node *node; + + memset(config, 0, sizeof *config); + + config->flags |= GRE_F_IN_CSUM; + config->flags |= GRE_F_OUT_CSUM; + config->flags |= GRE_F_PMTUD; + + SHASH_FOR_EACH (node, args) { + if (!strcmp(node->name, "remote_ip")) { + struct in_addr in_addr; + if (lookup_ip(node->data, &in_addr)) { + VLOG_WARN("%s: bad gre 'remote_ip'", name); + } else { + config->daddr = in_addr.s_addr; + } + } else if (!strcmp(node->name, "local_ip")) { + struct in_addr in_addr; + if (lookup_ip(node->data, &in_addr)) { + VLOG_WARN("%s: bad gre 'local_ip'", name); + } else { + config->saddr = in_addr.s_addr; + } + } else if (!strcmp(node->name, "key")) { + if (!strcmp(node->data, "flow")) { + config->flags |= GRE_F_IN_KEY_MATCH; + config->flags |= GRE_F_OUT_KEY_ACTION; + } else { + config->out_key = config->in_key = htonl(atoi(node->data)); + } + } else if (!strcmp(node->name, "in_key")) { + if (!strcmp(node->data, "flow")) { + config->flags |= GRE_F_IN_KEY_MATCH; + } else { + config->in_key = htonl(atoi(node->data)); + } + } else if (!strcmp(node->name, "out_key")) { + if (!strcmp(node->data, "flow")) { + config->flags |= GRE_F_OUT_KEY_ACTION; + } else { + config->out_key = htonl(atoi(node->data)); + } + } else if (!strcmp(node->name, "tos")) { + if (!strcmp(node->data, "inherit")) { + config->flags |= GRE_F_TOS_INHERIT; + } else { + config->tos = atoi(node->data); + } + } else if (!strcmp(node->name, "ttl")) { + if (!strcmp(node->data, "inherit")) { + config->flags |= GRE_F_TTL_INHERIT; + } else { + config->ttl = atoi(node->data); + } + } else if (!strcmp(node->name, "csum")) { + if (!strcmp(node->data, "false")) { + config->flags &= ~GRE_F_IN_CSUM; + config->flags &= ~GRE_F_OUT_CSUM; + } + } else if (!strcmp(node->name, "pmtud")) { + if (!strcmp(node->data, "false")) { + config->flags &= ~GRE_F_PMTUD; + } + } else { + VLOG_WARN("%s: unknown gre argument '%s'", name, node->name); + } + } + + if (!config->daddr) { + VLOG_WARN("%s: gre type requires valid 'remote_ip' argument", name); + return EINVAL; + } + + return 0; + } + + static int + netdev_gre_create(const char *name, const char *type OVS_UNUSED, + const struct shash *args, struct netdev_dev **netdev_devp) + { + int err; - struct odp_vport_add ova; ++ struct xflow_vport_add ova; + struct gre_port_config port_config; + struct netdev_dev_gre *netdev_dev; + + ovs_strlcpy(ova.port_type, "gre", sizeof ova.port_type); + ovs_strlcpy(ova.devname, name, sizeof ova.devname); + ova.config = &port_config; + + err = parse_config(name, args, &port_config); + if (err) { + return err; + } + - err = do_ioctl(ODP_VPORT_ADD, &ova); ++ err = do_ioctl(XFLOW_VPORT_ADD, &ova); + if (err == EEXIST) { + VLOG_WARN("%s: destroying existing device", name); + - err = do_ioctl(ODP_VPORT_DEL, ova.devname); ++ err = do_ioctl(XFLOW_VPORT_DEL, ova.devname); + if (err) { + return err; + } + - err = do_ioctl(ODP_VPORT_ADD, &ova); ++ err = do_ioctl(XFLOW_VPORT_ADD, &ova); + } + + if (err) { + return err; + } + + netdev_dev = xmalloc(sizeof *netdev_dev); + netdev_dev_init(&netdev_dev->netdev_dev, name, &netdev_gre_class); + + *netdev_devp = &netdev_dev->netdev_dev; + return 0; + } + + static int + netdev_gre_reconfigure(struct netdev_dev *netdev_dev_, const struct shash *args) + { + const char *name = netdev_dev_get_name(netdev_dev_); - struct odp_vport_mod ovm; ++ struct xflow_vport_mod ovm; + struct gre_port_config port_config; + int err; + + ovs_strlcpy(ovm.devname, name, sizeof ovm.devname); + ovm.config = &port_config; + + err = parse_config(name, args, &port_config); + if (err) { + return err; + } + - return do_ioctl(ODP_VPORT_MOD, &ovm); ++ return do_ioctl(XFLOW_VPORT_MOD, &ovm); + } + + static void + netdev_gre_destroy(struct netdev_dev *netdev_dev_) + { + struct netdev_dev_gre *netdev_dev = netdev_dev_gre_cast(netdev_dev_); + - do_ioctl(ODP_VPORT_DEL, (char *)netdev_dev_get_name(netdev_dev_)); ++ do_ioctl(XFLOW_VPORT_DEL, (char *)netdev_dev_get_name(netdev_dev_)); + free(netdev_dev); + } + + static int + netdev_gre_open(struct netdev_dev *netdev_dev_, int ethertype OVS_UNUSED, + struct netdev **netdevp) + { + struct netdev_gre *netdev; + + netdev = xmalloc(sizeof *netdev); + netdev_init(&netdev->netdev, netdev_dev_); + + *netdevp = &netdev->netdev; + return 0; + } + + static void + netdev_gre_close(struct netdev *netdev_) + { + struct netdev_gre *netdev = netdev_gre_cast(netdev_); + free(netdev); + } + + static int + netdev_gre_set_etheraddr(struct netdev *netdev_, + const uint8_t mac[ETH_ADDR_LEN]) + { + struct netdev_gre *netdev = netdev_gre_cast(netdev_); - struct odp_vport_ether vport_ether; ++ struct xflow_vport_ether vport_ether; + int err; + + ovs_strlcpy(vport_ether.devname, netdev_get_name(netdev_), + sizeof vport_ether.devname); + + memcpy(vport_ether.ether_addr, mac, ETH_ADDR_LEN); + - err = ioctl(ioctl_fd, ODP_VPORT_ETHER_SET, &vport_ether); ++ err = ioctl(ioctl_fd, XFLOW_VPORT_ETHER_SET, &vport_ether); + if (err) { + return err; + } + + poll_notify(netdev); + return 0; + } + + static int + netdev_gre_get_etheraddr(const struct netdev *netdev_, + uint8_t mac[ETH_ADDR_LEN]) + { - struct odp_vport_ether vport_ether; ++ struct xflow_vport_ether vport_ether; + int err; + + ovs_strlcpy(vport_ether.devname, netdev_get_name(netdev_), + sizeof vport_ether.devname); + - err = ioctl(ioctl_fd, ODP_VPORT_ETHER_GET, &vport_ether); ++ err = ioctl(ioctl_fd, XFLOW_VPORT_ETHER_GET, &vport_ether); + if (err) { + return err; + } + + memcpy(mac, vport_ether.ether_addr, ETH_ADDR_LEN); + return 0; + } + + static int + netdev_gre_get_mtu(const struct netdev *netdev_, int *mtup) + { - struct odp_vport_mtu vport_mtu; ++ struct xflow_vport_mtu vport_mtu; + int err; + + ovs_strlcpy(vport_mtu.devname, netdev_get_name(netdev_), + sizeof vport_mtu.devname); + - err = ioctl(ioctl_fd, ODP_VPORT_MTU_GET, &vport_mtu); ++ err = ioctl(ioctl_fd, XFLOW_VPORT_MTU_GET, &vport_mtu); + if (err) { + return err; + } + + *mtup = vport_mtu.mtu; + return 0; + } + + static int + netdev_gre_get_carrier(const struct netdev *netdev OVS_UNUSED, bool *carrier) + { + *carrier = true; + return 0; + } + + static int + netdev_gre_get_stats(const struct netdev *netdev_, struct netdev_stats *stats) + { + const char *name = netdev_get_name(netdev_); - struct odp_vport_stats_req ovsr; ++ struct xflow_vport_stats_req ovsr; + int err; + + ovs_strlcpy(ovsr.devname, name, sizeof ovsr.devname); - err = do_ioctl(ODP_VPORT_STATS_GET, &ovsr); ++ err = do_ioctl(XFLOW_VPORT_STATS_GET, &ovsr); + if (err) { + return err; + } + + stats->rx_packets = ovsr.stats.rx_packets; + stats->tx_packets = ovsr.stats.tx_packets; + stats->rx_bytes = ovsr.stats.rx_bytes; + stats->tx_bytes = ovsr.stats.tx_bytes; + stats->rx_errors = ovsr.stats.rx_errors; + stats->tx_errors = ovsr.stats.tx_errors; + stats->rx_dropped = ovsr.stats.rx_dropped; + stats->tx_dropped = ovsr.stats.tx_dropped; + stats->multicast = UINT64_MAX; + stats->collisions = ovsr.stats.collisions; + stats->rx_length_errors = UINT64_MAX; + stats->rx_over_errors = ovsr.stats.rx_over_err; + stats->rx_crc_errors = ovsr.stats.rx_crc_err; + stats->rx_frame_errors = ovsr.stats.rx_frame_err; + stats->rx_fifo_errors = UINT64_MAX; + stats->rx_missed_errors = UINT64_MAX; + stats->tx_aborted_errors = UINT64_MAX; + stats->tx_carrier_errors = UINT64_MAX; + stats->tx_fifo_errors = UINT64_MAX; + stats->tx_heartbeat_errors = UINT64_MAX; + stats->tx_window_errors = UINT64_MAX; + + return 0; + } + + static int + netdev_gre_update_flags(struct netdev *netdev OVS_UNUSED, + enum netdev_flags off, enum netdev_flags on OVS_UNUSED, + enum netdev_flags *old_flagsp) + { + if (off & (NETDEV_UP | NETDEV_PROMISC)) { + return EOPNOTSUPP; + } + + *old_flagsp = NETDEV_UP | NETDEV_PROMISC; + return 0; + } + + static int + netdev_gre_poll_add(struct netdev *netdev, void (*cb)(struct netdev_notifier *), + void *aux, struct netdev_notifier **notifierp) + { + const char *netdev_name = netdev_get_name(netdev); + struct netdev_gre_notifier *notifier; + struct list *list; + + list = shash_find_data(&netdev_gre_notifiers, netdev_name); + if (!list) { + list = xmalloc(sizeof *list); + list_init(list); + shash_add(&netdev_gre_notifiers, netdev_name, list); + } + + notifier = xmalloc(sizeof *notifier); + netdev_notifier_init(¬ifier->notifier, netdev, cb, aux); + list_push_back(list, ¬ifier->node); + + *notifierp = ¬ifier->notifier; + return 0; + } + + static void + netdev_gre_poll_remove(struct netdev_notifier *notifier_) + { + struct netdev_gre_notifier *notifier = + CONTAINER_OF(notifier_, struct netdev_gre_notifier, notifier); + struct list *list; + + list = list_remove(¬ifier->node); + if (list_is_empty(list)) { + const char *netdev_name = netdev_get_name(notifier_->netdev); + shash_delete(&netdev_gre_notifiers, + shash_find(&netdev_gre_notifiers, netdev_name)); + free(list); + } + free(notifier); + } + + static void + poll_notify(const struct netdev_gre *netdev) + { + struct list *list = shash_find_data(&netdev_gre_notifiers, + netdev_get_name(&netdev->netdev)); + + if (list) { + struct netdev_gre_notifier *notifier; + + LIST_FOR_EACH (notifier, struct netdev_gre_notifier, node, list) { + struct netdev_notifier *n = ¬ifier->notifier; + n->cb(n); + } + } + } + + const struct netdev_class netdev_gre_class = { + "gre", + + netdev_gre_init, + NULL, /* run */ + NULL, /* wait */ + + netdev_gre_create, + netdev_gre_destroy, + netdev_gre_reconfigure, + + netdev_gre_open, + netdev_gre_close, + + NULL, /* enumerate */ + + NULL, /* recv */ + NULL, /* recv_wait */ + NULL, /* drain */ + + NULL, /* send */ + NULL, /* send_wait */ + + netdev_gre_set_etheraddr, + netdev_gre_get_etheraddr, + netdev_gre_get_mtu, + NULL, /* get_ifindex */ + netdev_gre_get_carrier, + netdev_gre_get_stats, + NULL, /* set_stats */ + + NULL, /* get_features */ + NULL, /* set_advertisements */ + NULL, /* get_vlan_vid */ + NULL, /* set_policing */ + + NULL, /* get_in4 */ + NULL, /* set_in4 */ + NULL, /* get_in6 */ + NULL, /* add_router */ + NULL, /* get_next_hop */ + NULL, /* arp_lookup */ + + netdev_gre_update_flags, + + netdev_gre_poll_add, + netdev_gre_poll_remove, + }; diff --cc lib/ofp-print.c index bf038f43d,7c1ebd006..804b5aa5c --- a/lib/ofp-print.c +++ b/lib/ofp-print.c @@@ -134,8 -134,8 +134,8 @@@ ofp_packet_in(struct ds *string, const struct ofp_match match; packet.data = (void *) op->data; packet.size = data_len; - flow_extract(&packet, ntohs(op->in_port), &flow); - flow_to_match(&flow, &match); + flow_extract(&packet, 0, ntohs(op->in_port), &flow); - flow_to_match(&flow, 0, false, &match); ++ flow_to_match(&flow, false, &match); ofp_print_match(string, &match, verbosity); ds_put_char(string, '\n'); } diff --cc lib/vlog-modules.def index 8adb98561,5c836d6b8..1b80bc36f --- a/lib/vlog-modules.def +++ b/lib/vlog-modules.def @@@ -80,23 -82,21 +81,24 @@@ VLOG_MODULE(switchui VLOG_MODULE(terminal) VLOG_MODULE(timeval) VLOG_MODULE(tty) -VLOG_MODULE(socket_util) -VLOG_MODULE(switchui) VLOG_MODULE(unixctl) VLOG_MODULE(util) -VLOG_MODULE(vconn_tcp) +VLOG_MODULE(vconn) VLOG_MODULE(vconn_ssl) VLOG_MODULE(vconn_stream) +VLOG_MODULE(vconn_tcp) VLOG_MODULE(vconn_unix) -VLOG_MODULE(vconn) -VLOG_MODULE(vsctl) VLOG_MODULE(vlog) -VLOG_MODULE(wcelim) +VLOG_MODULE(vsctl) VLOG_MODULE(vswitchd) VLOG_MODULE(vt) +VLOG_MODULE(wcelim) +VLOG_MODULE(wdp) +VLOG_MODULE(wdp_xflow) VLOG_MODULE(xenserver) + VLOG_MODULE(xenserverd) +VLOG_MODULE(xfif) +VLOG_MODULE(xfif_linux) +VLOG_MODULE(xfif_netdev) #undef VLOG_MODULE diff --cc lib/xfif-linux.c index 8300f7006,b7c9e3e6c..feb58a8f6 --- a/lib/xfif-linux.c +++ b/lib/xfif-linux.c @@@ -30,22 -30,24 +30,24 @@@ #include #include -#include "dpif-provider.h" + #include "netdev.h" #include "ofpbuf.h" #include "poll-loop.h" #include "rtnetlink.h" + #include "shash.h" #include "svec.h" #include "util.h" +#include "xfif-provider.h" #include "vlog.h" -#define THIS_MODULE VLM_dpif_linux +#define THIS_MODULE VLM_xfif_linux /* Datapath interface for the openvswitch Linux kernel module. */ -struct dpif_linux { - struct dpif dpif; +struct xfif_linux { + struct xfif xfif; int fd; - /* Used by dpif_linux_get_all_names(). */ + /* Used by xfif_linux_get_all_names(). */ char *local_ifname; int minor; @@@ -167,14 -169,14 +169,14 @@@ xfif_linux_open(const char *name, cons } static void -dpif_linux_close(struct dpif *dpif_) +xfif_linux_close(struct xfif *xfif_) { - struct dpif_linux *dpif = dpif_linux_cast(dpif_); - rtnetlink_notifier_unregister(&dpif->port_notifier); - shash_destroy(&dpif->changed_ports); - free(dpif->local_ifname); - close(dpif->fd); - free(dpif); + struct xfif_linux *xfif = xfif_linux_cast(xfif_); + rtnetlink_notifier_unregister(&xfif->port_notifier); - svec_destroy(&xfif->changed_ports); ++ shash_destroy(&xfif->changed_ports); + free(xfif->local_ifname); + close(xfif->fd); + free(xfif); } static int @@@ -188,9 -190,31 +190,31 @@@ xfif_linux_get_all_names(const struct x } static int -dpif_linux_destroy(struct dpif *dpif_) +xfif_linux_destroy(struct xfif *xfif_) { - struct odp_port *ports; ++ struct xflow_port *ports; + size_t n_ports; + int err; + int i; + - err = dpif_port_list(dpif_, &ports, &n_ports); ++ err = xfif_port_list(xfif_, &ports, &n_ports); + if (err) { + return err; + } + + for (i = 0; i < n_ports; i++) { - if (ports[i].port != ODPP_LOCAL) { - err = do_ioctl(dpif_, ODP_VPORT_DEL, ports[i].devname); ++ if (ports[i].port != XFLOWP_LOCAL) { ++ err = do_ioctl(xfif_, XFLOW_VPORT_DEL, ports[i].devname); + if (err) { + VLOG_WARN_RL(&error_rl, "%s: error deleting port %s (%s)", - dpif_name(dpif_), ports[i].devname, strerror(err)); ++ xfif_name(xfif_), ports[i].devname, strerror(err)); + } + } + } + + free(ports); + - return do_ioctl(dpif_, ODP_DP_DESTROY, NULL); + return do_ioctl(xfif_, XFLOW_DP_DESTROY, NULL); } static int @@@ -230,7 -254,7 +254,7 @@@ xfif_linux_port_add(struct xfif *xfif_ memset(&port, 0, sizeof port); strncpy(port.devname, devname, sizeof port.devname); port.flags = flags; - error = do_ioctl(xfif_, XFLOW_PORT_ADD, &port); - error = do_ioctl(dpif_, ODP_PORT_ATTACH, &port); ++ error = do_ioctl(xfif_, XFLOW_PORT_ATTACH, &port); if (!error) { *port_no = port.port; } @@@ -238,10 -262,30 +262,30 @@@ } static int -dpif_linux_port_del(struct dpif *dpif_, uint16_t port_no) +xfif_linux_port_del(struct xfif *xfif_, uint16_t port_no) { int tmp = port_no; - return do_ioctl(xfif_, XFLOW_PORT_DEL, &tmp); + int err; - struct odp_port port; ++ struct xflow_port port; + - err = dpif_port_query_by_number(dpif_, port_no, &port); ++ err = xfif_port_query_by_number(xfif_, port_no, &port); + if (err) { + return err; + } + - err = do_ioctl(dpif_, ODP_PORT_DETACH, &tmp); ++ err = do_ioctl(xfif_, XFLOW_PORT_DETACH, &tmp); + if (err) { + return err; + } + + if (!netdev_is_open(port.devname)) { + /* Try deleting the port if no one has it open. This shouldn't + * actually be necessary unless the config changed while we weren't + * running but it won't hurt anything if the port is already gone. */ - do_ioctl(dpif_, ODP_VPORT_DEL, port.devname); ++ do_ioctl(xfif_, XFLOW_VPORT_DEL, port.devname); + } + + return 0; } static int @@@ -281,16 -325,18 +325,18 @@@ xfif_linux_port_list(const struct xfif } static int -dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep) +xfif_linux_port_poll(const struct xfif *xfif_, char **devnamep) { - struct dpif_linux *dpif = dpif_linux_cast(dpif_); + struct xfif_linux *xfif = xfif_linux_cast(xfif_); - if (dpif->change_error) { - dpif->change_error = false; - shash_clear(&dpif->changed_ports); + if (xfif->change_error) { + xfif->change_error = false; - svec_clear(&xfif->changed_ports); ++ shash_clear(&xfif->changed_ports); return ENOBUFS; - } else if (xfif->changed_ports.n) { - *devnamep = xfif->changed_ports.names[--xfif->changed_ports.n]; - } else if (!shash_is_empty(&dpif->changed_ports)) { - struct shash_node *node = shash_first(&dpif->changed_ports); ++ } else if (!shash_is_empty(&xfif->changed_ports)) { ++ struct shash_node *node = shash_first(&xfif->changed_ports); + *devnamep = xstrdup(node->name); - shash_delete(&dpif->changed_ports, node); ++ shash_delete(&xfif->changed_ports, node); return 0; } else { return EAGAIN; @@@ -298,10 -344,10 +344,10 @@@ } static void -dpif_linux_port_poll_wait(const struct dpif *dpif_) +xfif_linux_port_poll_wait(const struct xfif *xfif_) { - struct dpif_linux *dpif = dpif_linux_cast(dpif_); - if (!shash_is_empty(&dpif->changed_ports) || dpif->change_error) { + struct xfif_linux *xfif = xfif_linux_cast(xfif_); - if (xfif->changed_ports.n || xfif->change_error) { ++ if (!shash_is_empty(&xfif->changed_ports) || xfif->change_error) { poll_immediate_wake(); } else { rtnetlink_notifier_wait(); @@@ -417,8 -463,9 +463,9 @@@ xfif_linux_recv(struct xfif *xfif_, str int retval; int error; - buf = ofpbuf_new(65536); - buf = ofpbuf_new(65536 + DPIF_RECV_MSG_PADDING); - ofpbuf_reserve(buf, DPIF_RECV_MSG_PADDING); - retval = read(dpif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf)); ++ buf = ofpbuf_new(65536 + XFIF_RECV_MSG_PADDING); ++ ofpbuf_reserve(buf, XFIF_RECV_MSG_PADDING); + retval = read(xfif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf)); if (retval < 0) { error = errno; if (error != EAGAIN) { @@@ -735,18 -782,18 +782,18 @@@ open_minor(int minor, struct xfif **xfi char *name; name = xasprintf("dp%d", minor); - dpif_init(&dpif->dpif, &dpif_linux_class, name, minor, minor); + xfif_init(&xfif->xfif, &xfif_linux_class, name, minor, minor); free(name); - dpif->fd = fd; - dpif->local_ifname = NULL; - dpif->minor = minor; - dpif->local_ifindex = 0; - shash_init(&dpif->changed_ports); - dpif->change_error = false; - *dpifp = &dpif->dpif; + xfif->fd = fd; + xfif->local_ifname = NULL; + xfif->minor = minor; + xfif->local_ifindex = 0; - svec_init(&xfif->changed_ports); ++ shash_init(&xfif->changed_ports); + xfif->change_error = false; + *xfifp = &xfif->xfif; } else { - free(dpif); + free(xfif); } } else { error = errno; @@@ -769,12 -816,9 +816,9 @@@ xfif_linux_port_changed(const struct rt { /* Our datapath changed, either adding a new port or deleting an * existing one. */ - if (!svec_contains(&xfif->changed_ports, change->ifname)) { - svec_add(&xfif->changed_ports, change->ifname); - svec_sort(&xfif->changed_ports); - } - shash_add_once(&dpif->changed_ports, change->ifname, NULL); ++ shash_add_once(&xfif->changed_ports, change->ifname, NULL); } } else { - dpif->change_error = true; + xfif->change_error = true; } } diff --cc lib/xfif-netdev.c index 475e7f550,000000000..57be768ee mode 100644,000000..100644 --- a/lib/xfif-netdev.c +++ b/lib/xfif-netdev.c @@@ -1,1380 -1,0 +1,1381 @@@ +/* + * Copyright (c) 2009, 2010 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "xfif.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "csum.h" +#include "flow.h" +#include "hmap.h" +#include "list.h" +#include "netdev.h" +#include "xflow-util.h" +#include "ofp-print.h" +#include "ofpbuf.h" +#include "packets.h" +#include "poll-loop.h" +#include "queue.h" +#include "timeval.h" +#include "util.h" +#include "xfif-provider.h" + +#include "vlog.h" +#define THIS_MODULE VLM_xfif_netdev + +/* Configuration parameters. */ +enum { N_QUEUES = 2 }; /* Number of queues for xfif_recv(). */ +enum { MAX_QUEUE_LEN = 100 }; /* Maximum number of packets per queue. */ +enum { N_GROUPS = 16 }; /* Number of port groups. */ +enum { MAX_PORTS = 256 }; /* Maximum number of ports. */ +enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ + +/* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP + * headers to be aligned on a 4-byte boundary. */ +enum { XF_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN }; + +/* Datapath based on the network device interface from netdev.h. */ +struct xf_netdev { + struct list node; + int xf_idx; + int open_cnt; + bool destroyed; + + bool drop_frags; /* Drop all IP fragments, if true. */ + struct ovs_queue queues[N_QUEUES]; /* Messages queued for xfif_recv(). */ + struct hmap flow_table; /* Flow table. */ + struct xflow_port_group groups[N_GROUPS]; + + /* Statistics. */ + long long int n_frags; /* Number of dropped IP fragments. */ + long long int n_hit; /* Number of flow table matches. */ + long long int n_missed; /* Number of flow table misses. */ + long long int n_lost; /* Number of misses not passed to client. */ + + /* Ports. */ + int n_ports; + struct xf_netdev_port *ports[MAX_PORTS]; + struct list port_list; + unsigned int serial; +}; + +/* A port in a netdev-based datapath. */ +struct xf_netdev_port { + int port_no; /* Index into xf_netdev's 'ports'. */ + struct list node; /* Element in xf_netdev's 'port_list'. */ + struct netdev *netdev; + bool internal; /* Internal port (as XFLOW_PORT_INTERNAL)? */ +}; + +/* A flow in xf_netdev's 'flow_table'. */ +struct xf_netdev_flow { + struct hmap_node node; /* Element in xf_netdev's 'flow_table'. */ + struct xflow_key key; + + /* Statistics. */ + struct timeval used; /* Last used time, in milliseconds. */ + long long int packet_count; /* Number of packets matched. */ + long long int byte_count; /* Number of bytes matched. */ + uint8_t ip_tos; /* IP TOS value. */ + uint16_t tcp_ctl; /* Bitwise-OR of seen tcp_ctl values. */ + + /* Actions. */ + union xflow_action *actions; + unsigned int n_actions; +}; + +/* Interface to netdev-based datapath. */ +struct xfif_netdev { + struct xfif xfif; + struct xf_netdev *xf; + int listen_mask; + unsigned int xf_serial; +}; + +/* All netdev-based datapaths. */ +static struct xf_netdev *xf_netdevs[256]; +struct list xf_netdev_list = LIST_INITIALIZER(&xf_netdev_list); +enum { N_XF_NETDEVS = ARRAY_SIZE(xf_netdevs) }; + +/* Maximum port MTU seen so far. */ +static int max_mtu = ETH_PAYLOAD_MAX; + +static int get_port_by_number(struct xf_netdev *, uint16_t port_no, + struct xf_netdev_port **portp); +static int get_port_by_name(struct xf_netdev *, const char *devname, + struct xf_netdev_port **portp); +static void xf_netdev_free(struct xf_netdev *); +static void xf_netdev_flow_flush(struct xf_netdev *); +static int do_add_port(struct xf_netdev *, const char *devname, uint16_t flags, + uint16_t port_no); +static int do_del_port(struct xf_netdev *, uint16_t port_no); +static int xf_netdev_output_control(struct xf_netdev *, const struct ofpbuf *, + int queue_no, int port_no, uint32_t arg); +static int xf_netdev_execute_actions(struct xf_netdev *, + struct ofpbuf *, struct xflow_key *, + const union xflow_action *, int n); + +static struct xfif_netdev * +xfif_netdev_cast(const struct xfif *xfif) +{ + xfif_assert_class(xfif, &xfif_netdev_class); + return CONTAINER_OF(xfif, struct xfif_netdev, xfif); +} + +static struct xf_netdev * +get_xf_netdev(const struct xfif *xfif) +{ + return xfif_netdev_cast(xfif)->xf; +} + +static int +name_to_xf_idx(const char *name) +{ + if (!strncmp(name, "xf", 2) && isdigit((unsigned char)name[2])) { + int xf_idx = atoi(name + 2); + if (xf_idx >= 0 && xf_idx < N_XF_NETDEVS) { + return xf_idx; + } + } + return -1; +} + +static struct xf_netdev * +find_xf_netdev(const char *name) +{ + int xf_idx; + size_t i; + + xf_idx = name_to_xf_idx(name); + if (xf_idx >= 0) { + return xf_netdevs[xf_idx]; + } + + for (i = 0; i < N_XF_NETDEVS; i++) { + struct xf_netdev *xf = xf_netdevs[i]; + if (xf) { + struct xf_netdev_port *port; + if (!get_port_by_name(xf, name, &port)) { + return xf; + } + } + } + return NULL; +} + +static struct xfif * +create_xfif_netdev(struct xf_netdev *xf) +{ + struct xfif_netdev *xfif; + char *xfname; + + xf->open_cnt++; + + xfname = xasprintf("xf%d", xf->xf_idx); + xfif = xmalloc(sizeof *xfif); + xfif_init(&xfif->xfif, &xfif_netdev_class, xfname, xf->xf_idx, xf->xf_idx); + xfif->xf = xf; + xfif->listen_mask = 0; + xfif->xf_serial = xf->serial; + free(xfname); + + return &xfif->xfif; +} + +static int +create_xf_netdev(const char *name, int xf_idx, struct xfif **xfifp) +{ + struct xf_netdev *xf; + int error; + int i; + + if (xf_netdevs[xf_idx]) { + return EBUSY; + } + + /* Create datapath. */ + xf_netdevs[xf_idx] = xf = xzalloc(sizeof *xf); + list_push_back(&xf_netdev_list, &xf->node); + xf->xf_idx = xf_idx; + xf->open_cnt = 0; + xf->drop_frags = false; + for (i = 0; i < N_QUEUES; i++) { + queue_init(&xf->queues[i]); + } + hmap_init(&xf->flow_table); + for (i = 0; i < N_GROUPS; i++) { + xf->groups[i].ports = NULL; + xf->groups[i].n_ports = 0; + xf->groups[i].group = i; + } + list_init(&xf->port_list); + error = do_add_port(xf, name, XFLOW_PORT_INTERNAL, XFLOWP_LOCAL); + if (error) { + xf_netdev_free(xf); + return ENODEV; + } + + *xfifp = create_xfif_netdev(xf); + return 0; +} + +static int +xfif_netdev_open(const char *name, const char *type OVS_UNUSED, bool create, + struct xfif **xfifp) +{ + if (create) { + if (find_xf_netdev(name)) { + return EEXIST; + } else { + int xf_idx = name_to_xf_idx(name); + if (xf_idx >= 0) { + return create_xf_netdev(name, xf_idx, xfifp); + } else { + /* Scan for unused xf_idx number. */ + for (xf_idx = 0; xf_idx < N_XF_NETDEVS; xf_idx++) { + int error = create_xf_netdev(name, xf_idx, xfifp); + if (error != EBUSY) { + return error; + } + } + + /* All datapath numbers in use. */ + return ENOBUFS; + } + } + } else { + struct xf_netdev *xf = find_xf_netdev(name); + if (xf) { + *xfifp = create_xfif_netdev(xf); + return 0; + } else { + return ENODEV; + } + } +} + +static void +xf_netdev_free(struct xf_netdev *xf) +{ + int i; + + xf_netdev_flow_flush(xf); + while (xf->n_ports > 0) { + struct xf_netdev_port *port = CONTAINER_OF( + xf->port_list.next, struct xf_netdev_port, node); + do_del_port(xf, port->port_no); + } + for (i = 0; i < N_QUEUES; i++) { + queue_destroy(&xf->queues[i]); + } + hmap_destroy(&xf->flow_table); + for (i = 0; i < N_GROUPS; i++) { + free(xf->groups[i].ports); + } + xf_netdevs[xf->xf_idx] = NULL; + list_remove(&xf->node); + free(xf); +} + +static void +xfif_netdev_close(struct xfif *xfif) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + assert(xf->open_cnt > 0); + if (--xf->open_cnt == 0 && xf->destroyed) { + xf_netdev_free(xf); + } + free(xfif); +} + +static int +xfif_netdev_destroy(struct xfif *xfif) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + xf->destroyed = true; + return 0; +} + +static int +xfif_netdev_get_stats(const struct xfif *xfif, struct xflow_stats *stats) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + memset(stats, 0, sizeof *stats); + stats->n_flows = hmap_count(&xf->flow_table); + stats->cur_capacity = hmap_capacity(&xf->flow_table); + stats->max_capacity = MAX_FLOWS; + stats->n_ports = xf->n_ports; + stats->max_ports = MAX_PORTS; + stats->max_groups = N_GROUPS; + stats->n_frags = xf->n_frags; + stats->n_hit = xf->n_hit; + stats->n_missed = xf->n_missed; + stats->n_lost = xf->n_lost; + stats->max_miss_queue = MAX_QUEUE_LEN; + stats->max_action_queue = MAX_QUEUE_LEN; + return 0; +} + +static int +xfif_netdev_get_drop_frags(const struct xfif *xfif, bool *drop_fragsp) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + *drop_fragsp = xf->drop_frags; + return 0; +} + +static int +xfif_netdev_set_drop_frags(struct xfif *xfif, bool drop_frags) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + xf->drop_frags = drop_frags; + return 0; +} + +static int +do_add_port(struct xf_netdev *xf, const char *devname, uint16_t flags, + uint16_t port_no) +{ + bool internal = (flags & XFLOW_PORT_INTERNAL) != 0; + struct xf_netdev_port *port; + struct netdev_options netdev_options; + struct netdev *netdev; + int mtu; + int error; + + /* XXX reject devices already in some xf_netdev. */ + + /* Open and validate network device. */ + memset(&netdev_options, 0, sizeof netdev_options); + netdev_options.name = devname; + netdev_options.ethertype = NETDEV_ETH_TYPE_ANY; + netdev_options.may_create = true; + if (internal) { + netdev_options.type = "tap"; + } else { + netdev_options.may_open = true; + } + + error = netdev_open(&netdev_options, &netdev); + if (error) { + return error; + } + /* XXX reject loopback devices */ + /* XXX reject non-Ethernet devices */ + + error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, false); + if (error) { + netdev_close(netdev); + return error; + } + + port = xmalloc(sizeof *port); + port->port_no = port_no; + port->netdev = netdev; + port->internal = internal; + + netdev_get_mtu(netdev, &mtu); + if (mtu > max_mtu) { + max_mtu = mtu; + } + + list_push_back(&xf->port_list, &port->node); + xf->ports[port_no] = port; + xf->n_ports++; + xf->serial++; + + return 0; +} + +static int +xfif_netdev_port_add(struct xfif *xfif, const char *devname, uint16_t flags, + uint16_t *port_nop) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + int port_no; + + for (port_no = 0; port_no < MAX_PORTS; port_no++) { + if (!xf->ports[port_no]) { + *port_nop = port_no; + return do_add_port(xf, devname, flags, port_no); + } + } + return EFBIG; +} + +static int +xfif_netdev_port_del(struct xfif *xfif, uint16_t port_no) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + return port_no == XFLOWP_LOCAL ? EINVAL : do_del_port(xf, port_no); +} + +static bool +is_valid_port_number(uint16_t port_no) +{ + return port_no < MAX_PORTS; +} + +static int +get_port_by_number(struct xf_netdev *xf, + uint16_t port_no, struct xf_netdev_port **portp) +{ + if (!is_valid_port_number(port_no)) { + *portp = NULL; + return EINVAL; + } else { + *portp = xf->ports[port_no]; + return *portp ? 0 : ENOENT; + } +} + +static int +get_port_by_name(struct xf_netdev *xf, + const char *devname, struct xf_netdev_port **portp) +{ + struct xf_netdev_port *port; + + LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) { + if (!strcmp(netdev_get_name(port->netdev), devname)) { + *portp = port; + return 0; + } + } + return ENOENT; +} + +static int +do_del_port(struct xf_netdev *xf, uint16_t port_no) +{ + struct xf_netdev_port *port; + char *name; + int error; + + error = get_port_by_number(xf, port_no, &port); + if (error) { + return error; + } + + list_remove(&port->node); + xf->ports[port->port_no] = NULL; + xf->n_ports--; + xf->serial++; + + name = xstrdup(netdev_get_name(port->netdev)); + netdev_close(port->netdev); + + free(name); + free(port); + + return 0; +} + +static void +answer_port_query(const struct xf_netdev_port *port, struct xflow_port *xflow_port) +{ + memset(xflow_port, 0, sizeof *xflow_port); + ovs_strlcpy(xflow_port->devname, netdev_get_name(port->netdev), + sizeof xflow_port->devname); + xflow_port->port = port->port_no; + xflow_port->flags = port->internal ? XFLOW_PORT_INTERNAL : 0; +} + +static int +xfif_netdev_port_query_by_number(const struct xfif *xfif, uint16_t port_no, + struct xflow_port *xflow_port) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_port *port; + int error; + + error = get_port_by_number(xf, port_no, &port); + if (!error) { + answer_port_query(port, xflow_port); + } + return error; +} + +static int +xfif_netdev_port_query_by_name(const struct xfif *xfif, const char *devname, + struct xflow_port *xflow_port) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_port *port; + int error; + + error = get_port_by_name(xf, devname, &port); + if (!error) { + answer_port_query(port, xflow_port); + } + return error; +} + +static void +xf_netdev_free_flow(struct xf_netdev *xf, struct xf_netdev_flow *flow) +{ + hmap_remove(&xf->flow_table, &flow->node); + free(flow->actions); + free(flow); +} + +static void +xf_netdev_flow_flush(struct xf_netdev *xf) +{ + struct xf_netdev_flow *flow, *next; + + HMAP_FOR_EACH_SAFE (flow, next, struct xf_netdev_flow, node, + &xf->flow_table) { + xf_netdev_free_flow(xf, flow); + } +} + +static int +xfif_netdev_flow_flush(struct xfif *xfif) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + xf_netdev_flow_flush(xf); + return 0; +} + +static int +xfif_netdev_port_list(const struct xfif *xfif, struct xflow_port *ports, int n) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_port *port; + int i; + + i = 0; + LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) { + struct xflow_port *xflow_port = &ports[i]; + if (i >= n) { + break; + } + answer_port_query(port, xflow_port); + i++; + } + return xf->n_ports; +} + +static int +xfif_netdev_port_poll(const struct xfif *xfif_, char **devnamep OVS_UNUSED) +{ + struct xfif_netdev *xfif = xfif_netdev_cast(xfif_); + if (xfif->xf_serial != xfif->xf->serial) { + xfif->xf_serial = xfif->xf->serial; + return ENOBUFS; + } else { + return EAGAIN; + } +} + +static void +xfif_netdev_port_poll_wait(const struct xfif *xfif_) +{ + struct xfif_netdev *xfif = xfif_netdev_cast(xfif_); + if (xfif->xf_serial != xfif->xf->serial) { + poll_immediate_wake(); + } +} + +static int +get_port_group(const struct xfif *xfif, int group_no, + struct xflow_port_group **groupp) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + + if (group_no >= 0 && group_no < N_GROUPS) { + *groupp = &xf->groups[group_no]; + return 0; + } else { + *groupp = NULL; + return EINVAL; + } +} + +static int +xfif_netdev_port_group_get(const struct xfif *xfif, int group_no, + uint16_t ports[], int n) +{ + struct xflow_port_group *group; + int error; + + if (n < 0) { + return -EINVAL; + } + + error = get_port_group(xfif, group_no, &group); + if (!error) { + memcpy(ports, group->ports, MIN(n, group->n_ports) * sizeof *ports); + return group->n_ports; + } else { + return -error; + } +} + +static int +xfif_netdev_port_group_set(struct xfif *xfif, int group_no, + const uint16_t ports[], int n) +{ + struct xflow_port_group *group; + int error; + + if (n < 0 || n > MAX_PORTS) { + return EINVAL; + } + + error = get_port_group(xfif, group_no, &group); + if (!error) { + free(group->ports); + group->ports = xmemdup(ports, n * sizeof *group->ports); + group->n_ports = n; + group->group = group_no; + } + return error; +} + +static struct xf_netdev_flow * +xf_netdev_lookup_flow(const struct xf_netdev *xf, + const struct xflow_key *key) +{ + struct xf_netdev_flow *flow; + + HMAP_FOR_EACH_WITH_HASH (flow, struct xf_netdev_flow, node, + xflow_key_hash(key, 0), &xf->flow_table) { + if (xflow_key_equal(&flow->key, key)) { + return flow; + } + } + return NULL; +} + +static void +answer_flow_query(struct xf_netdev_flow *flow, uint32_t query_flags, + struct xflow_flow *xflow_flow) +{ + if (flow) { + xflow_flow->key = flow->key; + xflow_flow->stats.n_packets = flow->packet_count; + xflow_flow->stats.n_bytes = flow->byte_count; + xflow_flow->stats.used_sec = flow->used.tv_sec; + xflow_flow->stats.used_nsec = flow->used.tv_usec * 1000; + xflow_flow->stats.tcp_flags = TCP_FLAGS(flow->tcp_ctl); + xflow_flow->stats.ip_tos = flow->ip_tos; + xflow_flow->stats.error = 0; + if (xflow_flow->n_actions > 0) { + unsigned int n = MIN(xflow_flow->n_actions, flow->n_actions); + memcpy(xflow_flow->actions, flow->actions, + n * sizeof *xflow_flow->actions); + xflow_flow->n_actions = flow->n_actions; + } + + if (query_flags & XFLOWFF_ZERO_TCP_FLAGS) { + flow->tcp_ctl = 0; + } + + } else { + xflow_flow->stats.error = ENOENT; + } +} + +static int +xfif_netdev_flow_get(const struct xfif *xfif, struct xflow_flow flows[], int n) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + int i; + + for (i = 0; i < n; i++) { + struct xflow_flow *xflow_flow = &flows[i]; + answer_flow_query(xf_netdev_lookup_flow(xf, &xflow_flow->key), + xflow_flow->flags, xflow_flow); + } + return 0; +} + +static int +xfif_netdev_validate_actions(const union xflow_action *actions, int n_actions, + bool *mutates) +{ + unsigned int i; + + *mutates = false; + for (i = 0; i < n_actions; i++) { + const union xflow_action *a = &actions[i]; + switch (a->type) { + case XFLOWAT_OUTPUT: + if (a->output.port >= MAX_PORTS) { + return EINVAL; + } + break; + + case XFLOWAT_OUTPUT_GROUP: + *mutates = true; + if (a->output_group.group >= N_GROUPS) { + return EINVAL; + } + break; + + case XFLOWAT_CONTROLLER: + break; + + case XFLOWAT_SET_DL_TCI: + *mutates = true; + if (a->dl_tci.mask != htons(VLAN_VID_MASK) + && a->dl_tci.mask != htons(VLAN_PCP_MASK) + && a->dl_tci.mask != htons(VLAN_VID_MASK | VLAN_PCP_MASK)) { + return EINVAL; + } + if (a->dl_tci.tci & ~a->dl_tci.mask){ + return EINVAL; + } + break; + + case XFLOWAT_SET_NW_TOS: + *mutates = true; + if (a->nw_tos.nw_tos & IP_ECN_MASK) { + return EINVAL; + } + break; + + case XFLOWAT_STRIP_VLAN: + case XFLOWAT_SET_DL_SRC: + case XFLOWAT_SET_DL_DST: + case XFLOWAT_SET_NW_SRC: + case XFLOWAT_SET_NW_DST: + case XFLOWAT_SET_TP_SRC: + case XFLOWAT_SET_TP_DST: + *mutates = true; + break; + + default: + return EOPNOTSUPP; + } + } + return 0; +} + +static int +set_flow_actions(struct xf_netdev_flow *flow, struct xflow_flow *xflow_flow) +{ + size_t n_bytes; + bool mutates; + int error; + + if (xflow_flow->n_actions >= 4096 / sizeof *xflow_flow->actions) { + return EINVAL; + } + error = xfif_netdev_validate_actions(xflow_flow->actions, + xflow_flow->n_actions, &mutates); + if (error) { + return error; + } + + n_bytes = xflow_flow->n_actions * sizeof *flow->actions; + flow->actions = xrealloc(flow->actions, n_bytes); + flow->n_actions = xflow_flow->n_actions; + memcpy(flow->actions, xflow_flow->actions, n_bytes); + return 0; +} + +static int +add_flow(struct xfif *xfif, struct xflow_flow *xflow_flow) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_flow *flow; + int error; + + flow = xzalloc(sizeof *flow); + flow->key = xflow_flow->key; + + error = set_flow_actions(flow, xflow_flow); + if (error) { + free(flow); + return error; + } + + hmap_insert(&xf->flow_table, &flow->node, + xflow_key_hash(&flow->key, 0)); + return 0; +} + +static void +clear_stats(struct xf_netdev_flow *flow) +{ + flow->used.tv_sec = 0; + flow->used.tv_usec = 0; + flow->packet_count = 0; + flow->byte_count = 0; + flow->ip_tos = 0; + flow->tcp_ctl = 0; +} + +static int +xfif_netdev_flow_put(struct xfif *xfif, struct xflow_flow_put *put) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_flow *flow; + + flow = xf_netdev_lookup_flow(xf, &put->flow.key); + if (!flow) { + if (put->flags & XFLOWPF_CREATE) { + if (hmap_count(&xf->flow_table) < MAX_FLOWS) { + return add_flow(xfif, &put->flow); + } else { + return EFBIG; + } + } else { + return ENOENT; + } + } else { + if (put->flags & XFLOWPF_MODIFY) { + int error = set_flow_actions(flow, &put->flow); + if (!error && put->flags & XFLOWPF_ZERO_STATS) { + clear_stats(flow); + } + return error; + } else { + return EEXIST; + } + } +} + + +static int +xfif_netdev_flow_del(struct xfif *xfif, struct xflow_flow *xflow_flow) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_flow *flow; + + flow = xf_netdev_lookup_flow(xf, &xflow_flow->key); + if (flow) { + answer_flow_query(flow, 0, xflow_flow); + xf_netdev_free_flow(xf, flow); + return 0; + } else { + return ENOENT; + } +} + +static int +xfif_netdev_flow_list(const struct xfif *xfif, struct xflow_flow flows[], int n) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_flow *flow; + int i; + + i = 0; + HMAP_FOR_EACH (flow, struct xf_netdev_flow, node, &xf->flow_table) { + if (i >= n) { + break; + } + answer_flow_query(flow, 0, &flows[i++]); + } + return hmap_count(&xf->flow_table); +} + +static int +xfif_netdev_execute(struct xfif *xfif, uint16_t in_port, + const union xflow_action actions[], int n_actions, + const struct ofpbuf *packet) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct ofpbuf copy; + bool mutates; + struct xflow_key key; + flow_t flow; + int error; + + if (packet->size < ETH_HEADER_LEN || packet->size > UINT16_MAX) { + return EINVAL; + } + + error = xfif_netdev_validate_actions(actions, n_actions, &mutates); + if (error) { + return error; + } + + if (mutates) { + /* We need a deep copy of 'packet' since we're going to modify its + * data. */ + ofpbuf_init(©, XF_NETDEV_HEADROOM + packet->size); + copy.data = (char*)copy.base + XF_NETDEV_HEADROOM; + ofpbuf_put(©, packet->data, packet->size); + } else { + /* We still need a shallow copy of 'packet', even though we won't + * modify its data, because flow_extract() modifies packet->l2, etc. + * We could probably get away with modifying those but it's more polite + * if we don't. */ + copy = *packet; + } - flow_extract(©, in_port, &flow); ++ flow_extract(©, 0, in_port, &flow); + xflow_key_from_flow(&key, &flow); + error = xf_netdev_execute_actions(xf, ©, &key, actions, n_actions); + if (mutates) { + ofpbuf_uninit(©); + } + return error; +} + +static int +xfif_netdev_recv_get_mask(const struct xfif *xfif, int *listen_mask) +{ + struct xfif_netdev *xfif_netdev = xfif_netdev_cast(xfif); + *listen_mask = xfif_netdev->listen_mask; + return 0; +} + +static int +xfif_netdev_recv_set_mask(struct xfif *xfif, int listen_mask) +{ + struct xfif_netdev *xfif_netdev = xfif_netdev_cast(xfif); + if (!(listen_mask & ~XFLOWL_ALL)) { + xfif_netdev->listen_mask = listen_mask; + return 0; + } else { + return EINVAL; + } +} + +static struct ovs_queue * +find_nonempty_queue(struct xfif *xfif) +{ + struct xfif_netdev *xfif_netdev = xfif_netdev_cast(xfif); + struct xf_netdev *xf = get_xf_netdev(xfif); + int mask = xfif_netdev->listen_mask; + int i; + + for (i = 0; i < N_QUEUES; i++) { + struct ovs_queue *q = &xf->queues[i]; + if (q->n && mask & (1u << i)) { + return q; + } + } + return NULL; +} + +static int +xfif_netdev_recv(struct xfif *xfif, struct ofpbuf **bufp) +{ + struct ovs_queue *q = find_nonempty_queue(xfif); + if (q) { + *bufp = queue_pop_head(q); + return 0; + } else { + return EAGAIN; + } +} + +static void +xfif_netdev_recv_wait(struct xfif *xfif) +{ + struct ovs_queue *q = find_nonempty_queue(xfif); + if (q) { + poll_immediate_wake(); + } else { + /* No messages ready to be received, and xf_wait() will ensure that we + * wake up to queue new messages, so there is nothing to do. */ + } +} + +static void +xf_netdev_flow_used(struct xf_netdev_flow *flow, + const struct xflow_key *key, + const struct ofpbuf *packet) +{ + time_timeval(&flow->used); + flow->packet_count++; + flow->byte_count += packet->size; + if (key->dl_type == htons(ETH_TYPE_IP)) { + struct ip_header *nh = packet->l3; + flow->ip_tos = nh->ip_tos; + + if (key->nw_proto == IPPROTO_TCP) { + struct tcp_header *th = packet->l4; + flow->tcp_ctl |= th->tcp_ctl; + } + } +} + +static void +xf_netdev_port_input(struct xf_netdev *xf, struct xf_netdev_port *port, + struct ofpbuf *packet) +{ + struct xf_netdev_flow *flow; + struct xflow_key key; + flow_t f; + - if (flow_extract(packet, port->port_no, &f) && xf->drop_frags) { ++ if (flow_extract(packet, 0, port->port_no, &f) && xf->drop_frags) { + xf->n_frags++; + return; + } + xflow_key_from_flow(&key, &f); + + flow = xf_netdev_lookup_flow(xf, &key); + if (flow) { + xf_netdev_flow_used(flow, &key, packet); + xf_netdev_execute_actions(xf, packet, &key, + flow->actions, flow->n_actions); + xf->n_hit++; + } else { + xf->n_missed++; + xf_netdev_output_control(xf, packet, _XFLOWL_MISS_NR, port->port_no, 0); + } +} + +static void +xf_netdev_run(void) +{ + struct ofpbuf packet; + struct xf_netdev *xf; + + ofpbuf_init(&packet, XF_NETDEV_HEADROOM + max_mtu); + LIST_FOR_EACH (xf, struct xf_netdev, node, &xf_netdev_list) { + struct xf_netdev_port *port; + + LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) { + int error; + + /* Reset packet contents. */ + packet.data = (char*)packet.base + XF_NETDEV_HEADROOM; + packet.size = 0; + + error = netdev_recv(port->netdev, &packet); + if (!error) { + xf_netdev_port_input(xf, port, &packet); + } else if (error != EAGAIN) { + struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_ERR_RL(&rl, "error receiving data from %s: %s", + netdev_get_name(port->netdev), strerror(error)); + } + } + } + ofpbuf_uninit(&packet); +} + +static void +xf_netdev_wait(void) +{ + struct xf_netdev *xf; + + LIST_FOR_EACH (xf, struct xf_netdev, node, &xf_netdev_list) { + struct xf_netdev_port *port; + LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) { + netdev_recv_wait(port->netdev); + } + } +} + + +/* Modify or add a 802.1Q header in 'packet' according to 'a'. */ +static void +xf_netdev_set_dl_tci(struct ofpbuf *packet, struct xflow_key *key, + const struct xflow_action_dl_tci *a) +{ + struct vlan_eth_header *veh; + + if (key->dl_tci) { + veh = packet->l2; + veh->veth_tci = (veh->veth_tci & ~a->mask) | a->tci; + } else { + /* Insert new 802.1Q header. */ + struct eth_header *eh = packet->l2; + struct vlan_eth_header tmp; + memcpy(tmp.veth_dst, eh->eth_dst, ETH_ADDR_LEN); + memcpy(tmp.veth_src, eh->eth_src, ETH_ADDR_LEN); + tmp.veth_type = htons(ETH_TYPE_VLAN); + tmp.veth_tci = htons(a->tci); + tmp.veth_next_type = eh->eth_type; + + veh = ofpbuf_push_uninit(packet, VLAN_HEADER_LEN); + memcpy(veh, &tmp, sizeof tmp); + packet->l2 = (char*)packet->l2 - VLAN_HEADER_LEN; + } + + key->dl_tci = veh->veth_tci | htons(XFLOW_TCI_PRESENT); +} + +static void +xf_netdev_strip_vlan(struct ofpbuf *packet, struct xflow_key *key) +{ + struct vlan_eth_header *veh = packet->l2; + if (veh->veth_type == htons(ETH_TYPE_VLAN)) { + struct eth_header tmp; + + memcpy(tmp.eth_dst, veh->veth_dst, ETH_ADDR_LEN); + memcpy(tmp.eth_src, veh->veth_src, ETH_ADDR_LEN); + tmp.eth_type = veh->veth_next_type; + + packet->size -= VLAN_HEADER_LEN; + packet->data = (char*)packet->data + VLAN_HEADER_LEN; + packet->l2 = (char*)packet->l2 + VLAN_HEADER_LEN; + memcpy(packet->data, &tmp, sizeof tmp); + + key->dl_tci = htons(0); + } +} + +static void +xf_netdev_set_dl_src(struct ofpbuf *packet, struct xflow_key *key, + const uint8_t dl_addr[ETH_ADDR_LEN]) +{ + struct eth_header *eh = packet->l2; + memcpy(eh->eth_src, dl_addr, sizeof eh->eth_src); + memcpy(key->dl_src, dl_addr, sizeof key->dl_src); +} + +static void +xf_netdev_set_dl_dst(struct ofpbuf *packet, struct xflow_key *key, + const uint8_t dl_addr[ETH_ADDR_LEN]) +{ + struct eth_header *eh = packet->l2; + memcpy(eh->eth_dst, dl_addr, sizeof eh->eth_dst); + memcpy(key->dl_dst, dl_addr, sizeof key->dl_dst); +} + +static void +xf_netdev_set_nw_addr(struct ofpbuf *packet, struct xflow_key *key, + const struct xflow_action_nw_addr *a) +{ + if (key->dl_type == htons(ETH_TYPE_IP)) { + struct ip_header *nh = packet->l3; + uint32_t *field; + + field = a->type == XFLOWAT_SET_NW_SRC ? &nh->ip_src : &nh->ip_dst; + if (key->nw_proto == IP_TYPE_TCP) { + struct tcp_header *th = packet->l4; + th->tcp_csum = recalc_csum32(th->tcp_csum, *field, a->nw_addr); + } else if (key->nw_proto == IP_TYPE_UDP) { + struct udp_header *uh = packet->l4; + if (uh->udp_csum) { + uh->udp_csum = recalc_csum32(uh->udp_csum, *field, a->nw_addr); + if (!uh->udp_csum) { + uh->udp_csum = 0xffff; + } + } + } + nh->ip_csum = recalc_csum32(nh->ip_csum, *field, a->nw_addr); + *field = a->nw_addr; + + if (a->type == XFLOWAT_SET_NW_SRC) { + key->nw_src = a->type; + } else { + key->nw_dst = a->type; + } + } +} + +static void +xf_netdev_set_nw_tos(struct ofpbuf *packet, struct xflow_key *key, + const struct xflow_action_nw_tos *a) +{ + if (key->dl_type == htons(ETH_TYPE_IP)) { + struct ip_header *nh = packet->l3; + uint8_t *field = &nh->ip_tos; + + /* Set the DSCP bits and preserve the ECN bits. */ + uint8_t new = a->nw_tos | (nh->ip_tos & IP_ECN_MASK); + + nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t)*field), + htons((uint16_t)a->nw_tos)); + *field = new; + key->nw_tos = a->nw_tos; + } +} + +static void +xf_netdev_set_tp_port(struct ofpbuf *packet, struct xflow_key *key, + const struct xflow_action_tp_port *a) +{ + if (key->dl_type == htons(ETH_TYPE_IP)) { + uint16_t *field; + if (key->nw_proto == IPPROTO_TCP) { + struct tcp_header *th = packet->l4; + field = a->type == XFLOWAT_SET_TP_SRC ? &th->tcp_src : &th->tcp_dst; + th->tcp_csum = recalc_csum16(th->tcp_csum, *field, a->tp_port); + *field = a->tp_port; + } else if (key->nw_proto == IPPROTO_UDP) { + struct udp_header *uh = packet->l4; + field = a->type == XFLOWAT_SET_TP_SRC ? &uh->udp_src : &uh->udp_dst; + uh->udp_csum = recalc_csum16(uh->udp_csum, *field, a->tp_port); + *field = a->tp_port; + } else { + return; + } + + if (a->type == XFLOWAT_SET_TP_SRC) { + key->tp_src = a->tp_port; + } else { + key->tp_dst = a->tp_port; + } + } +} + +static void +xf_netdev_output_port(struct xf_netdev *xf, struct ofpbuf *packet, + uint16_t out_port) +{ + struct xf_netdev_port *p = xf->ports[out_port]; + if (p) { + netdev_send(p->netdev, packet); + } +} + +static void +xf_netdev_output_group(struct xf_netdev *xf, uint16_t group, uint16_t in_port, + struct ofpbuf *packet) +{ + struct xflow_port_group *g = &xf->groups[group]; + int i; + + for (i = 0; i < g->n_ports; i++) { + uint16_t out_port = g->ports[i]; + if (out_port != in_port) { + xf_netdev_output_port(xf, packet, out_port); + } + } +} + +static int +xf_netdev_output_control(struct xf_netdev *xf, const struct ofpbuf *packet, + int queue_no, int port_no, uint32_t arg) +{ + struct ovs_queue *q = &xf->queues[queue_no]; + struct xflow_msg *header; + struct ofpbuf *msg; + size_t msg_size; + + if (q->n >= MAX_QUEUE_LEN) { + xf->n_lost++; + return ENOBUFS; + } + + msg_size = sizeof *header + packet->size; - msg = ofpbuf_new(msg_size); ++ msg = ofpbuf_new(msg_size + XFIF_RECV_MSG_PADDING); + header = ofpbuf_put_uninit(msg, sizeof *header); ++ ofpbuf_reserve(msg, XFIF_RECV_MSG_PADDING); + header->type = queue_no; + header->length = msg_size; + header->port = port_no; + header->arg = arg; + ofpbuf_put(msg, packet->data, packet->size); + queue_push_tail(q, msg); + + return 0; +} + +static int +xf_netdev_execute_actions(struct xf_netdev *xf, + struct ofpbuf *packet, struct xflow_key *key, + const union xflow_action *actions, int n_actions) +{ + int i; + for (i = 0; i < n_actions; i++) { + const union xflow_action *a = &actions[i]; + + switch (a->type) { + case XFLOWAT_OUTPUT: + xf_netdev_output_port(xf, packet, a->output.port); + break; + + case XFLOWAT_OUTPUT_GROUP: + xf_netdev_output_group(xf, a->output_group.group, key->in_port, + packet); + break; + + case XFLOWAT_CONTROLLER: + xf_netdev_output_control(xf, packet, _XFLOWL_ACTION_NR, + key->in_port, a->controller.arg); + break; + + case XFLOWAT_SET_DL_TCI: + xf_netdev_set_dl_tci(packet, key, &a->dl_tci); + break; + + case XFLOWAT_STRIP_VLAN: + xf_netdev_strip_vlan(packet, key); + break; + + case XFLOWAT_SET_DL_SRC: + xf_netdev_set_dl_src(packet, key, a->dl_addr.dl_addr); + break; + + case XFLOWAT_SET_DL_DST: + xf_netdev_set_dl_dst(packet, key, a->dl_addr.dl_addr); + break; + + case XFLOWAT_SET_NW_SRC: + case XFLOWAT_SET_NW_DST: + xf_netdev_set_nw_addr(packet, key, &a->nw_addr); + break; + + case XFLOWAT_SET_NW_TOS: + xf_netdev_set_nw_tos(packet, key, &a->nw_tos); + break; + + case XFLOWAT_SET_TP_SRC: + case XFLOWAT_SET_TP_DST: + xf_netdev_set_tp_port(packet, key, &a->tp_port); + break; + } + } + return 0; +} + +const struct xfif_class xfif_netdev_class = { + "netdev", + xf_netdev_run, + xf_netdev_wait, + NULL, /* enumerate */ + xfif_netdev_open, + xfif_netdev_close, + NULL, /* get_all_names */ + xfif_netdev_destroy, + xfif_netdev_get_stats, + xfif_netdev_get_drop_frags, + xfif_netdev_set_drop_frags, + xfif_netdev_port_add, + xfif_netdev_port_del, + xfif_netdev_port_query_by_number, + xfif_netdev_port_query_by_name, + xfif_netdev_port_list, + xfif_netdev_port_poll, + xfif_netdev_port_poll_wait, + xfif_netdev_port_group_get, + xfif_netdev_port_group_set, + xfif_netdev_flow_get, + xfif_netdev_flow_put, + xfif_netdev_flow_del, + xfif_netdev_flow_flush, + xfif_netdev_flow_list, + xfif_netdev_execute, + xfif_netdev_recv_get_mask, + xfif_netdev_recv_set_mask, + NULL, /* get_sflow_probability */ + NULL, /* set_sflow_probability */ + xfif_netdev_recv, + xfif_netdev_recv_wait, +}; diff --cc lib/xfif-provider.h index 31de0aca1,33663ff47..3b427972f --- a/lib/xfif-provider.h +++ b/lib/xfif-provider.h @@@ -21,7 -21,9 +21,9 @@@ * datapath. */ #include + #include "openflow/openflow.h" -#include "dpif.h" +#include "xfif.h" + #include "util.h" #ifdef __cplusplus extern "C" { @@@ -298,24 -298,34 +300,34 @@@ struct xfif_class * 'probability' is expressed as the number of packets out of UINT_MAX to * sample, e.g. probability/UINT_MAX is the probability of sampling a given * packet. */ - int (*set_sflow_probability)(struct dpif *dpif, uint32_t probability); + int (*set_sflow_probability)(struct xfif *xfif, uint32_t probability); - /* Attempts to receive a message from 'dpif'. If successful, stores the + /* Attempts to receive a message from 'xfif'. If successful, stores the * message into '*packetp'. The message, if one is received, must begin - * with 'struct xflow_msg' as a header. Only messages of the types - * selected with the recv_set_mask member function should be received. - * with 'struct odp_msg' as a header, and must have at least - * DPIF_RECV_MSG_PADDING bytes of headroom (allocated using ++ * with 'struct xflow_msg' as a header, and must have at least ++ * XFIF_RECV_MSG_PADDING bytes of headroom (allocated using + * e.g. ofpbuf_reserve()). Only messages of the types selected with the + * set_listen_mask member function should be received. * * This function must not block. If no message is ready to be received * when it is called, it should return EAGAIN without blocking. */ - int (*recv)(struct dpif *dpif, struct ofpbuf **packetp); + int (*recv)(struct xfif *xfif, struct ofpbuf **packetp); - /* Arranges for the poll loop to wake up when 'dpif' has a message queued + /* Arranges for the poll loop to wake up when 'xfif' has a message queued * to be received with the recv member function. */ - void (*recv_wait)(struct dpif *dpif); + void (*recv_wait)(struct xfif *xfif); }; + /* Minimum number of bytes of headroom for a packet returned by the 'recv' - * member function (see above). This headroom allows "struct odp_msg" to be ++ * member function (see above). This headroom allows "struct xflow_msg" to be + * replaced by "struct ofp_packet_in" without copying the buffer. */ -#define DPIF_RECV_MSG_PADDING (sizeof(struct ofp_packet_in) \ - - sizeof(struct odp_msg)) -BUILD_ASSERT_DECL(sizeof(struct ofp_packet_in) > sizeof(struct odp_msg)); -BUILD_ASSERT_DECL(DPIF_RECV_MSG_PADDING % 4 == 0); ++#define XFIF_RECV_MSG_PADDING (sizeof(struct ofp_packet_in) \ ++ - sizeof(struct xflow_msg)) ++BUILD_ASSERT_DECL(sizeof(struct ofp_packet_in) > sizeof(struct xflow_msg)); ++BUILD_ASSERT_DECL(XFIF_RECV_MSG_PADDING % 4 == 0); + -extern const struct dpif_class dpif_linux_class; -extern const struct dpif_class dpif_netdev_class; +extern const struct xfif_class xfif_linux_class; +extern const struct xfif_class xfif_netdev_class; #ifdef __cplusplus } diff --cc lib/xfif.c index bb7a19544,097b38d8c..1f18f4ca7 --- a/lib/xfif.c +++ b/lib/xfif.c @@@ -57,20 -57,20 +57,20 @@@ static struct shash xfif_classes = SHAS * we really need to see them. */ static struct vlog_rate_limit dpmsg_rl = VLOG_RATE_LIMIT_INIT(600, 600); -/* Not really much point in logging many dpif errors. */ +/* Not really much point in logging many xfif errors. */ - static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5); + static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5); -static void log_operation(const struct dpif *, const char *operation, +static void log_operation(const struct xfif *, const char *operation, int error); -static void log_flow_operation(const struct dpif *, const char *operation, - int error, struct odp_flow *flow); -static void log_flow_put(struct dpif *, int error, - const struct odp_flow_put *); +static void log_flow_operation(const struct xfif *, const char *operation, + int error, struct xflow_flow *flow); +static void log_flow_put(struct xfif *, int error, + const struct xflow_flow_put *); static bool should_log_flow_message(int error); -static void check_rw_odp_flow(struct odp_flow *); +static void check_rw_xflow_flow(struct xflow_flow *); static void -dp_initialize(void) +xf_initialize(void) { static int status = -1; @@@ -724,8 -724,13 +724,13 @@@ xfif_flow_get(const struct xfif *xfif, if (!error) { error = flow->stats.error; } + if (error) { + /* Make the results predictable on error. */ + memset(&flow->stats, 0, sizeof flow->stats); + flow->n_actions = 0; + } if (should_log_flow_message(error)) { - log_flow_operation(dpif, "flow_get", error, flow); + log_flow_operation(xfif, "flow_get", error, flow); } return error; } @@@ -1008,22 -1013,25 +1013,25 @@@ xfif_set_sflow_probability(struct xfif return error; } -/* Attempts to receive a message from 'dpif'. If successful, stores the +/* Attempts to receive a message from 'xfif'. If successful, stores the * message into '*packetp'. The message, if one is received, will begin with - * 'struct xflow_msg' as a header. Only messages of the types selected with - * 'struct odp_msg' as a header, and will have at least DPIF_RECV_MSG_PADDING ++ * 'struct xflow_msg' as a header, and will have at least XFIF_RECV_MSG_PADDING + * bytes of headroom. Only messages of the types selected with - * dpif_set_listen_mask() will ordinarily be received (but if a message type is + * xfif_set_listen_mask() will ordinarily be received (but if a message type is * enabled and then later disabled, some stragglers might pop up). * * Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN * if no message is immediately available. */ int -dpif_recv(struct dpif *dpif, struct ofpbuf **packetp) +xfif_recv(struct xfif *xfif, struct ofpbuf **packetp) { - int error = dpif->dpif_class->recv(dpif, packetp); + int error = xfif->xfif_class->recv(xfif, packetp); if (!error) { + struct ofpbuf *buf = *packetp; + - assert(ofpbuf_headroom(buf) >= DPIF_RECV_MSG_PADDING); ++ assert(ofpbuf_headroom(buf) >= XFIF_RECV_MSG_PADDING); if (VLOG_IS_DBG_ENABLED()) { - struct ofpbuf *buf = *packetp; - struct odp_msg *msg = buf->data; + struct xflow_msg *msg = buf->data; void *payload = msg + 1; size_t payload_len = buf->size - sizeof *msg; char *s = ofp_packet_to_string(payload, payload_len, payload_len); diff --cc lib/xflow-util.c index 09524330b,000000000..b6d2d13ec mode 100644,000000..100644 --- a/lib/xflow-util.c +++ b/lib/xflow-util.c @@@ -1,198 -1,0 +1,204 @@@ +/* + * Copyright (c) 2009, 2010 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "xflow-util.h" +#include +#include +#include +#include "coverage.h" +#include "dynamic-string.h" +#include "flow.h" +#include "packets.h" +#include "timeval.h" +#include "util.h" + +union xflow_action * +xflow_actions_add(struct xflow_actions *actions, uint16_t type) +{ + union xflow_action *a; + if (actions->n_actions < MAX_XFLOW_ACTIONS) { + a = &actions->actions[actions->n_actions++]; + } else { + COVERAGE_INC(xflow_overflow); + actions->n_actions = MAX_XFLOW_ACTIONS + 1; + a = &actions->actions[MAX_XFLOW_ACTIONS - 1]; + } + memset(a, 0, sizeof *a); + a->type = type; + return a; +} + +void +format_xflow_key(struct ds *ds, const struct xflow_key *key) +{ - ds_put_format(ds, "in_port%04x", key->in_port); ++ ds_put_format(ds, "tunnel%"PRIx32":in_port%04x", ++ key->tun_id, key->in_port); + if (key->dl_tci) { + ds_put_format(ds, ":vlan%"PRIu16":pcp%d", + vlan_tci_to_vid(key->dl_tci), + vlan_tci_to_pcp(key->dl_tci)); + } + ds_put_format(ds, " mac"ETH_ADDR_FMT"->"ETH_ADDR_FMT" type%04x " + "proto%"PRId8" tos%"PRIu8" ip"IP_FMT"->"IP_FMT" port%d->%d", + ETH_ADDR_ARGS(key->dl_src), ETH_ADDR_ARGS(key->dl_dst), + ntohs(key->dl_type), key->nw_proto, key->nw_tos, + IP_ARGS(&key->nw_src), IP_ARGS(&key->nw_dst), + ntohs(key->tp_src), ntohs(key->tp_dst)); +} + +void +format_xflow_action(struct ds *ds, const union xflow_action *a) +{ + switch (a->type) { + case XFLOWAT_OUTPUT: + ds_put_format(ds, "%"PRIu16, a->output.port); + break; + case XFLOWAT_OUTPUT_GROUP: + ds_put_format(ds, "g%"PRIu16, a->output_group.group); + break; + case XFLOWAT_CONTROLLER: + ds_put_format(ds, "ctl(%"PRIu32")", a->controller.arg); + break; ++ case XFLOWAT_SET_TUNNEL: ++ ds_put_format(ds, "set_tunnel(0x%08"PRIx32")", ntohl(a->tunnel.tun_id)); ++ break; + case XFLOWAT_SET_DL_TCI: + ds_put_format(ds, "set_tci(%04"PRIx16",mask=%04"PRIx16")", + ntohs(a->dl_tci.tci), ntohs(a->dl_tci.mask)); + break; + case XFLOWAT_STRIP_VLAN: + ds_put_format(ds, "strip_vlan"); + break; + case XFLOWAT_SET_DL_SRC: + ds_put_format(ds, "set_dl_src("ETH_ADDR_FMT")", + ETH_ADDR_ARGS(a->dl_addr.dl_addr)); + break; + case XFLOWAT_SET_DL_DST: + ds_put_format(ds, "set_dl_dst("ETH_ADDR_FMT")", + ETH_ADDR_ARGS(a->dl_addr.dl_addr)); + break; + case XFLOWAT_SET_NW_SRC: + ds_put_format(ds, "set_nw_src("IP_FMT")", + IP_ARGS(&a->nw_addr.nw_addr)); + break; + case XFLOWAT_SET_NW_DST: + ds_put_format(ds, "set_nw_dst("IP_FMT")", + IP_ARGS(&a->nw_addr.nw_addr)); + break; + case XFLOWAT_SET_NW_TOS: + ds_put_format(ds, "set_nw_tos(%"PRIu8")", a->nw_tos.nw_tos); + break; + case XFLOWAT_SET_TP_SRC: + ds_put_format(ds, "set_tp_src(%"PRIu16")", ntohs(a->tp_port.tp_port)); + break; + case XFLOWAT_SET_TP_DST: + ds_put_format(ds, "set_tp_dst(%"PRIu16")", ntohs(a->tp_port.tp_port)); + break; + default: - ds_put_format(ds, "***bad action %"PRIu16"***", a->type); ++ ds_put_format(ds, "***bad action 0x%"PRIx16"***", a->type); + break; + } +} + +void +format_xflow_actions(struct ds *ds, const union xflow_action *actions, + size_t n_actions) +{ + size_t i; + for (i = 0; i < n_actions; i++) { + if (i) { + ds_put_char(ds, ','); + } + format_xflow_action(ds, &actions[i]); + } + if (!n_actions) { + ds_put_cstr(ds, "drop"); + } +} + +void +format_xflow_flow_stats(struct ds *ds, const struct xflow_flow_stats *s) +{ + ds_put_format(ds, "packets:%llu, bytes:%llu, used:", + (unsigned long long int) s->n_packets, + (unsigned long long int) s->n_bytes); + if (s->used_sec) { + long long int used = s->used_sec * 1000 + s->used_nsec / 1000000; + ds_put_format(ds, "%.3fs", (time_msec() - used) / 1000.0); + } else { + ds_put_format(ds, "never"); + } +} + +void +format_xflow_flow(struct ds *ds, const struct xflow_flow *f) +{ + format_xflow_key(ds, &f->key); + ds_put_cstr(ds, ", "); + format_xflow_flow_stats(ds, &f->stats); + ds_put_cstr(ds, ", actions:"); + format_xflow_actions(ds, f->actions, f->n_actions); +} + +void +xflow_key_from_flow(struct xflow_key *key, const struct flow *flow) +{ ++ key->tun_id = flow->tun_id; + key->nw_src = flow->nw_src; - key->nw_dst = ofp_port_to_xflow_port(flow->nw_dst); - key->in_port = flow->in_port; ++ key->nw_dst = flow->nw_dst; ++ key->in_port = ofp_port_to_xflow_port(flow->in_port); + if (flow->dl_vlan == htons(OFP_VLAN_NONE)) { + key->dl_tci = htons(0); + } else { + uint16_t vid = flow->dl_vlan & htons(VLAN_VID_MASK); + uint16_t pcp = htons((flow->dl_vlan_pcp << VLAN_PCP_SHIFT) + & VLAN_PCP_MASK); + key->dl_tci = vid | pcp | htons(XFLOW_TCI_PRESENT); + } + key->dl_type = flow->dl_type; + key->tp_src = flow->tp_src; + key->tp_dst = flow->tp_dst; + memcpy(key->dl_src, flow->dl_src, ETH_ALEN); + memcpy(key->dl_dst, flow->dl_dst, ETH_ALEN); + key->nw_proto = flow->nw_proto; + key->nw_tos = flow->nw_tos; +} + +void +xflow_key_to_flow(const struct xflow_key *key, struct flow *flow) +{ + flow->wildcards = 0; + flow->priority = 0xffff; ++ flow->tun_id = key->tun_id; + flow->nw_src = key->nw_src; + flow->nw_dst = key->nw_dst; + flow->in_port = xflow_port_to_ofp_port(key->in_port); + if (key->dl_tci) { + flow->dl_vlan = htons(vlan_tci_to_vid(key->dl_tci)); + flow->dl_vlan_pcp = vlan_tci_to_pcp(key->dl_tci); + } else { + flow->dl_vlan = htons(OFP_VLAN_NONE); + flow->dl_vlan_pcp = 0; + } + flow->dl_type = key->dl_type; + flow->tp_src = key->tp_src; + flow->tp_dst = key->tp_dst; + memcpy(flow->dl_src, key->dl_src, ETH_ALEN); + memcpy(flow->dl_dst, key->dl_dst, ETH_ALEN); + flow->nw_proto = key->nw_proto; + flow->nw_tos = key->nw_tos; +} diff --cc ofproto/discovery.h index 737b045f9,2288ff60f..63960e9e1 --- a/ofproto/discovery.h +++ b/ofproto/discovery.h @@@ -22,14 -22,16 +22,16 @@@ struct discovery; struct settings; struct switch_status; +struct wdp; int discovery_create(const char *accept_controller_re, bool update_resolv_conf, - struct dpif *, struct switch_status *, + struct wdp *, struct switch_status *, struct discovery **); void discovery_destroy(struct discovery *); + bool discovery_get_update_resolv_conf(const struct discovery *); void discovery_set_update_resolv_conf(struct discovery *, bool update_resolv_conf); + const char *discovery_get_accept_controller_re(const struct discovery *); int discovery_set_accept_controller_re(struct discovery *, const char *re); void discovery_question_connectivity(struct discovery *); bool discovery_run(struct discovery *, char **controller_name); diff --cc ofproto/fail-open.c index b6016b42a,a79c5b226..d19893be1 --- a/ofproto/fail-open.c +++ b/ofproto/fail-open.c @@@ -172,9 -263,7 +262,9 @@@ fail_open_recover(struct fail_open *fo fo->next_bogus_packet_in = LLONG_MAX; memset(&flow, 0, sizeof flow); - flow.wildcards = OFPFW_ALL; - ofproto_delete_flow(fo->ofproto, &flow, OVSFW_ALL, FAIL_OPEN_PRIORITY); ++ flow.wildcards = OVSFW_ALL; + flow.priority = FAIL_OPEN_PRIORITY; + ofproto_delete_flow(fo->ofproto, &flow); } } @@@ -202,9 -291,8 +292,9 @@@ fail_open_flushed(struct fail_open *fo action.output.len = htons(sizeof action); action.output.port = htons(OFPP_NORMAL); memset(&flow, 0, sizeof flow); - flow.wildcards = OFPFW_ALL; - ofproto_add_flow(fo->ofproto, &flow, OVSFW_ALL, FAIL_OPEN_PRIORITY, - &action, 1, 0); ++ flow.wildcards = OVSFW_ALL; + flow.priority = FAIL_OPEN_PRIORITY; + ofproto_add_flow(fo->ofproto, &flow, &action, 1, 0); } } diff --cc ofproto/in-band.c index b485e5aca,bf90273e1..076c71a5f --- a/ofproto/in-band.c +++ b/ofproto/in-band.c @@@ -23,23 -23,17 +23,16 @@@ #include #include #include "dhcp.h" -#include "dpif.h" #include "flow.h" - #include "mac-learning.h" #include "netdev.h" - #include "xflow-util.h" - #include "ofp-print.h" -#include "odp-util.h" #include "ofproto.h" #include "ofpbuf.h" #include "openflow/openflow.h" - #include "openvswitch/xflow.h" #include "packets.h" #include "poll-loop.h" - #include "rconn.h" #include "status.h" #include "timeval.h" - #include "vconn.h" +#include "wdp.h" - #include "xfif.h" #define THIS_MODULE VLM_in_band #include "vlog.h" @@@ -194,48 -198,64 +197,62 @@@ * gateway. */ - #define IB_BASE_PRIORITY 18181800 - + /* Priorities used in classifier for in-band rules. These values are higher + * than any that may be set with OpenFlow, and "18" kind of looks like "IB". + * The ordering of priorities is not important because all of the rules set up + * by in-band control have the same action. The only reason to use more than + * one priority is to make the kind of flow easier to see during debugging. */ enum { - IBR_FROM_LOCAL_DHCP, /* (a) From local port, DHCP. */ + /* One set per bridge. */ + IBR_FROM_LOCAL_DHCP = 180000, /* (a) From local port, DHCP. */ IBR_TO_LOCAL_ARP, /* (b) To local port, ARP. */ IBR_FROM_LOCAL_ARP, /* (c) From local port, ARP. */ - IBR_TO_REMOTE_ARP, /* (d) To remote MAC, ARP. */ - IBR_FROM_REMOTE_ARP, /* (e) From remote MAC, ARP. */ - IBR_TO_CTL_ARP, /* (f) To controller IP, ARP. */ - IBR_FROM_CTL_ARP, /* (g) From controller IP, ARP. */ - IBR_TO_CTL_OFP, /* (h) To controller, OpenFlow port. */ - IBR_FROM_CTL_OFP, /* (i) From controller, OpenFlow port. */ - #if OFP_TCP_PORT != OFP_SSL_PORT - #error Need to support separate TCP and SSL flows. - #endif - N_IB_RULES + + /* One set per unique next-hop MAC. */ + IBR_TO_NEXT_HOP_ARP, /* (d) To remote MAC, ARP. */ + IBR_FROM_NEXT_HOP_ARP, /* (e) From remote MAC, ARP. */ + + /* One set per unique remote IP address. */ + IBR_TO_REMOTE_ARP, /* (f) To remote IP, ARP. */ + IBR_FROM_REMOTE_ARP, /* (g) From remote IP, ARP. */ + + /* One set per unique remote (IP,port) pair. */ + IBR_TO_REMOTE_TCP, /* (h) To remote IP, TCP port. */ + IBR_FROM_REMOTE_TCP /* (i) From remote IP, TCP port. */ }; - struct ib_rule { - bool installed; + struct in_band_rule { flow_t flow; - uint32_t wildcards; - unsigned int priority; }; + /* Track one remote IP and next hop information. */ + struct in_band_remote { + struct sockaddr_in remote_addr; /* IP address, in network byte order. */ + uint8_t remote_mac[ETH_ADDR_LEN]; /* Next-hop MAC, all-zeros if unknown. */ + uint8_t last_remote_mac[ETH_ADDR_LEN]; /* Previous nonzero next-hop MAC. */ + struct netdev *remote_netdev; /* Device to send to next-hop MAC. */ + }; + struct in_band { struct ofproto *ofproto; - struct rconn *controller; struct status_category *ss_cat; - /* Keep track of local port's information. */ - uint8_t local_mac[ETH_ADDR_LEN]; /* Current MAC. */ - struct netdev *local_netdev; /* Local port's network device. */ - time_t next_local_refresh; - - /* Keep track of controller and next hop's information. */ - uint32_t controller_ip; /* Controller IP, 0 if unknown. */ - uint8_t remote_mac[ETH_ADDR_LEN]; /* Remote MAC. */ - struct netdev *remote_netdev; - uint8_t last_remote_mac[ETH_ADDR_LEN]; /* Previous remote MAC. */ - time_t next_remote_refresh; - - /* Rules that we set up. */ - struct ib_rule rules[N_IB_RULES]; + /* Remote information. */ + time_t next_remote_refresh; /* Refresh timer. */ + struct in_band_remote *remotes; + size_t n_remotes; + + /* Local information. */ + time_t next_local_refresh; /* Refresh timer. */ + uint8_t local_mac[ETH_ADDR_LEN]; /* Current MAC. */ + struct netdev *local_netdev; /* Local port's network device. */ + + /* Local and remote addresses that are installed as flows. */ + uint8_t installed_local_mac[ETH_ADDR_LEN]; + struct sockaddr_in *remote_addrs; + size_t n_remote_addrs; + uint8_t *remote_macs; + size_t n_remote_macs; }; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60); @@@ -343,40 -392,314 +389,247 @@@ in_band_status_cb(struct status_reply * } } -/* Returns true if 'packet' should be sent to the local port regardless - * of the flow table. */ -bool -in_band_msg_in_hook(struct in_band *in_band, const flow_t *flow, - const struct ofpbuf *packet) -{ - if (!in_band) { - return false; - } - - /* Regardless of how the flow table is configured, we want to be - * able to see replies to our DHCP requests. */ - if (flow->dl_type == htons(ETH_TYPE_IP) - && flow->nw_proto == IP_TYPE_UDP - && flow->tp_src == htons(DHCP_SERVER_PORT) - && flow->tp_dst == htons(DHCP_CLIENT_PORT) - && packet->l7) { - struct dhcp_header *dhcp; - - dhcp = ofpbuf_at(packet, (char *)packet->l7 - (char *)packet->data, - sizeof *dhcp); - if (!dhcp) { - return false; - } - - refresh_local(in_band); - if (!eth_addr_is_zero(in_band->local_mac) - && eth_addr_equals(dhcp->chaddr, in_band->local_mac)) { - return true; - } - } - - return false; -} - -/* Returns true if the rule that would match 'flow' with 'actions' is - * allowed to be set up in the datapath. */ -bool -in_band_rule_check(struct in_band *in_band, const flow_t *flow, - const struct odp_actions *actions) -{ - if (!in_band) { - return true; - } - - /* Don't allow flows that would prevent DHCP replies from being seen - * by the local port. */ - if (flow->dl_type == htons(ETH_TYPE_IP) - && flow->nw_proto == IP_TYPE_UDP - && flow->tp_src == htons(DHCP_SERVER_PORT) - && flow->tp_dst == htons(DHCP_CLIENT_PORT)) { - int i; - - for (i=0; in_actions; i++) { - if (actions->actions[i].output.type == ODPAT_OUTPUT - && actions->actions[i].output.port == ODPP_LOCAL) { - return true; - } - } - return false; - } - - return true; -} - static void - drop_flow(struct in_band *in_band, int rule_idx) + init_rule(struct in_band_rule *rule, unsigned int priority) + { - rule->wildcards = OVSFW_ALL; - rule->priority = priority; - - /* Not strictly necessary but seems cleaner. */ ++ /* Clearing the flow is not strictly necessary but it seems cleaner. */ + memset(&rule->flow, 0, sizeof rule->flow); ++ ++ rule->flow.wildcards = OVSFW_ALL; ++ rule->flow.priority = priority; + } + + static void -set_in_port(struct in_band_rule *rule, uint16_t odp_port) ++set_in_port(struct in_band_rule *rule, uint16_t ofp_port) { - struct ib_rule *rule = &in_band->rules[rule_idx]; - rule->wildcards &= ~OFPFW_IN_PORT; - rule->flow.in_port = odp_port; ++ rule->flow.wildcards &= ~OFPFW_IN_PORT; ++ rule->flow.in_port = ofp_port; + } - if (rule->installed) { - rule->installed = false; - ofproto_delete_flow(in_band->ofproto, &rule->flow); + static void + set_dl_type(struct in_band_rule *rule, uint16_t dl_type) + { - rule->wildcards &= ~OFPFW_DL_TYPE; ++ rule->flow.wildcards &= ~OFPFW_DL_TYPE; + rule->flow.dl_type = dl_type; + } + + static void + set_dl_src(struct in_band_rule *rule, const uint8_t dl_src[ETH_ADDR_LEN]) + { - rule->wildcards &= ~OFPFW_DL_SRC; ++ rule->flow.wildcards &= ~OFPFW_DL_SRC; + memcpy(rule->flow.dl_src, dl_src, ETH_ADDR_LEN); + } + + static void + set_dl_dst(struct in_band_rule *rule, const uint8_t dl_dst[ETH_ADDR_LEN]) + { - rule->wildcards &= ~OFPFW_DL_DST; ++ rule->flow.wildcards &= ~OFPFW_DL_DST; + memcpy(rule->flow.dl_dst, dl_dst, ETH_ADDR_LEN); + } + + static void + set_tp_src(struct in_band_rule *rule, uint16_t tp_src) + { - rule->wildcards &= ~OFPFW_TP_SRC; ++ rule->flow.wildcards &= ~OFPFW_TP_SRC; + rule->flow.tp_src = tp_src; + } + + static void + set_tp_dst(struct in_band_rule *rule, uint16_t tp_dst) + { - rule->wildcards &= ~OFPFW_TP_DST; ++ rule->flow.wildcards &= ~OFPFW_TP_DST; + rule->flow.tp_dst = tp_dst; + } + + static void + set_nw_proto(struct in_band_rule *rule, uint8_t nw_proto) + { - rule->wildcards &= ~OFPFW_NW_PROTO; ++ rule->flow.wildcards &= ~OFPFW_NW_PROTO; + rule->flow.nw_proto = nw_proto; + } + + static void + set_nw_src(struct in_band_rule *rule, const struct in_addr nw_src) + { - rule->wildcards &= ~OFPFW_NW_SRC_MASK; ++ rule->flow.wildcards &= ~OFPFW_NW_SRC_MASK; + rule->flow.nw_src = nw_src.s_addr; + } + + static void + set_nw_dst(struct in_band_rule *rule, const struct in_addr nw_dst) + { - rule->wildcards &= ~OFPFW_NW_DST_MASK; ++ rule->flow.wildcards &= ~OFPFW_NW_DST_MASK; + rule->flow.nw_dst = nw_dst.s_addr; + } + + static void + make_rules(struct in_band *ib, + void (*cb)(struct in_band *, const struct in_band_rule *)) + { + struct in_band_rule rule; + size_t i; + + if (!eth_addr_is_zero(ib->installed_local_mac)) { + /* (a) Allow DHCP requests sent from the local port. */ + init_rule(&rule, IBR_FROM_LOCAL_DHCP); - set_in_port(&rule, ODPP_LOCAL); ++ set_in_port(&rule, OFPP_LOCAL); + set_dl_type(&rule, htons(ETH_TYPE_IP)); + set_dl_src(&rule, ib->installed_local_mac); + set_nw_proto(&rule, IP_TYPE_UDP); + set_tp_src(&rule, htons(DHCP_CLIENT_PORT)); + set_tp_dst(&rule, htons(DHCP_SERVER_PORT)); + cb(ib, &rule); + + /* (b) Allow ARP replies to the local port's MAC address. */ + init_rule(&rule, IBR_TO_LOCAL_ARP); + set_dl_type(&rule, htons(ETH_TYPE_ARP)); + set_dl_dst(&rule, ib->installed_local_mac); + set_nw_proto(&rule, ARP_OP_REPLY); + cb(ib, &rule); + + /* (c) Allow ARP requests from the local port's MAC address. */ + init_rule(&rule, IBR_FROM_LOCAL_ARP); + set_dl_type(&rule, htons(ETH_TYPE_ARP)); + set_dl_src(&rule, ib->installed_local_mac); + set_nw_proto(&rule, ARP_OP_REQUEST); + cb(ib, &rule); + } + + for (i = 0; i < ib->n_remote_macs; i++) { + const uint8_t *remote_mac = &ib->remote_macs[i * ETH_ADDR_LEN]; + + if (i > 0) { + const uint8_t *prev_mac = &ib->remote_macs[(i - 1) * ETH_ADDR_LEN]; + if (eth_addr_equals(remote_mac, prev_mac)) { + /* Skip duplicates. */ + continue; + } + } + + /* (d) Allow ARP replies to the next hop's MAC address. */ + init_rule(&rule, IBR_TO_NEXT_HOP_ARP); + set_dl_type(&rule, htons(ETH_TYPE_ARP)); + set_dl_dst(&rule, remote_mac); + set_nw_proto(&rule, ARP_OP_REPLY); + cb(ib, &rule); + + /* (e) Allow ARP requests from the next hop's MAC address. */ + init_rule(&rule, IBR_FROM_NEXT_HOP_ARP); + set_dl_type(&rule, htons(ETH_TYPE_ARP)); + set_dl_src(&rule, remote_mac); + set_nw_proto(&rule, ARP_OP_REQUEST); + cb(ib, &rule); + } + + for (i = 0; i < ib->n_remote_addrs; i++) { + const struct sockaddr_in *a = &ib->remote_addrs[i]; + + if (!i || a->sin_addr.s_addr != a[-1].sin_addr.s_addr) { + /* (f) Allow ARP replies containing the remote's IP address as a + * target. */ + init_rule(&rule, IBR_TO_REMOTE_ARP); + set_dl_type(&rule, htons(ETH_TYPE_ARP)); + set_nw_proto(&rule, ARP_OP_REPLY); + set_nw_dst(&rule, a->sin_addr); + cb(ib, &rule); + + /* (g) Allow ARP requests containing the remote's IP address as a + * source. */ + init_rule(&rule, IBR_FROM_REMOTE_ARP); + set_dl_type(&rule, htons(ETH_TYPE_ARP)); + set_nw_proto(&rule, ARP_OP_REQUEST); + set_nw_src(&rule, a->sin_addr); + cb(ib, &rule); + } + + if (!i + || a->sin_addr.s_addr != a[-1].sin_addr.s_addr + || a->sin_port != a[-1].sin_port) { + /* (h) Allow TCP traffic to the remote's IP and port. */ + init_rule(&rule, IBR_TO_REMOTE_TCP); + set_dl_type(&rule, htons(ETH_TYPE_IP)); + set_nw_proto(&rule, IP_TYPE_TCP); + set_nw_dst(&rule, a->sin_addr); + set_tp_dst(&rule, a->sin_port); + cb(ib, &rule); + + /* (i) Allow TCP traffic from the remote's IP and port. */ + init_rule(&rule, IBR_FROM_REMOTE_TCP); + set_dl_type(&rule, htons(ETH_TYPE_IP)); + set_nw_proto(&rule, IP_TYPE_TCP); + set_nw_src(&rule, a->sin_addr); + set_tp_src(&rule, a->sin_port); + cb(ib, &rule); + } } } - /* out_port and fixed_fields are assumed never to change. */ static void - set_up_flow(struct in_band *in_band, int rule_idx, const flow_t *flow, - uint32_t fixed_fields, uint16_t out_port) + drop_rule(struct in_band *ib, const struct in_band_rule *rule) { - struct ib_rule *rule = &in_band->rules[rule_idx]; - ofproto_delete_flow(ib->ofproto, &rule->flow, - rule->wildcards, rule->priority); ++ ofproto_delete_flow(ib->ofproto, &rule->flow); + } - if (!rule->installed || memcmp(flow, &rule->flow, sizeof *flow)) { - union ofp_action action; + /* Drops from the flow table all of the flows set up by 'ib', then clears out + * the information about the installed flows so that they can be filled in + * again if necessary. */ + static void + drop_rules(struct in_band *ib) + { + /* Drop rules. */ + make_rules(ib, drop_rule); + + /* Clear out state. */ + memset(ib->installed_local_mac, 0, sizeof ib->installed_local_mac); + + free(ib->remote_addrs); + ib->remote_addrs = NULL; + ib->n_remote_addrs = 0; + + free(ib->remote_macs); + ib->remote_macs = NULL; + ib->n_remote_macs = 0; + } + + static void + add_rule(struct in_band *ib, const struct in_band_rule *rule) + { + union ofp_action action; - drop_flow(in_band, rule_idx); + action.type = htons(OFPAT_OUTPUT); + action.output.len = htons(sizeof action); + action.output.port = htons(OFPP_NORMAL); + action.output.max_len = htons(0); - ofproto_add_flow(ib->ofproto, &rule->flow, rule->wildcards, - rule->priority, &action, 1, 0); ++ ofproto_add_flow(ib->ofproto, &rule->flow, &action, 1, 0); + } - rule->installed = true; - rule->flow = *flow; - rule->flow.wildcards = OFPFW_ALL & ~fixed_fields; - rule->flow.priority = IB_BASE_PRIORITY + (N_IB_RULES - rule_idx); + /* Inserts flows into the flow table for the current state of 'ib'. */ + static void + add_rules(struct in_band *ib) + { + make_rules(ib, add_rule); + } - action.type = htons(OFPAT_OUTPUT); - action.output.len = htons(sizeof action); - action.output.port = htons(out_port); - action.output.max_len = htons(0); - ofproto_add_flow(in_band->ofproto, &rule->flow, &action, 1, 0); + static int + compare_addrs(const void *a_, const void *b_) + { + const struct sockaddr_in *a = a_; + const struct sockaddr_in *b = b_; + int cmp; + + cmp = memcmp(&a->sin_addr.s_addr, + &b->sin_addr.s_addr, + sizeof a->sin_addr.s_addr); + if (cmp) { + return cmp; } + return memcmp(&a->sin_port, &b->sin_port, sizeof a->sin_port); + } + + static int + compare_macs(const void *a, const void *b) + { + return memcmp(a, b, ETH_ADDR_LEN); } void @@@ -532,16 -764,16 +694,15 @@@ in_band_flushed(struct in_band *in_band } int -in_band_create(struct ofproto *ofproto, struct dpif *dpif, +in_band_create(struct ofproto *ofproto, struct wdp *wdp, - struct switch_status *ss, struct rconn *controller, - struct in_band **in_bandp) + struct switch_status *ss, struct in_band **in_bandp) { struct in_band *in_band; - char *local_name; - char local_name[IF_NAMESIZE]; struct netdev *local_netdev; ++ char *local_name; int error; - error = dpif_port_get_name(dpif, ODPP_LOCAL, - local_name, sizeof local_name); + error = wdp_port_get_name(wdp, OFPP_LOCAL, &local_name); if (error) { VLOG_ERR("failed to initialize in-band control: cannot get name " "of datapath local port (%s)", strerror(error)); diff --cc ofproto/in-band.h index 75ab0be7e,5122e4c00..55724669b --- a/ofproto/in-band.h +++ b/ofproto/in-band.h @@@ -25,13 -26,21 +25,22 @@@ struct ofproto struct rconn; struct settings; struct switch_status; +struct wdp; -int in_band_create(struct ofproto *, struct dpif *, struct switch_status *, +int in_band_create(struct ofproto *, struct wdp *, struct switch_status *, - struct rconn *controller, struct in_band **); + struct in_band **); void in_band_destroy(struct in_band *); + + void in_band_set_remotes(struct in_band *, + const struct sockaddr_in *, size_t n); + void in_band_run(struct in_band *); void in_band_wait(struct in_band *); + + bool in_band_msg_in_hook(struct in_band *, const flow_t *, + const struct ofpbuf *packet); + bool in_band_rule_check(struct in_band *, const flow_t *, - const struct odp_actions *); ++ const struct xflow_actions *); void in_band_flushed(struct in_band *); #endif /* in-band.h */ diff --cc ofproto/ofproto-sflow.c index eb70df461,60baf0e9e..8cedfe95b --- a/ofproto/ofproto-sflow.c +++ b/ofproto/ofproto-sflow.c @@@ -253,13 -248,8 +250,8 @@@ ofproto_sflow_clear(struct ofproto_sflo ofproto_sflow_options_destroy(os->options); os->options = NULL; - PORT_ARRAY_FOR_EACH (osp, &os->ports, xflow_port) { - ofproto_sflow_del_port(os, xflow_port); - } - port_array_clear(&os->ports); - /* Turn off sampling to save CPU cycles. */ - dpif_set_sflow_probability(os->dpif, 0); + wdp_set_sflow_probability(os->wdp, 0); } bool @@@ -284,7 -274,13 +276,13 @@@ voi ofproto_sflow_destroy(struct ofproto_sflow *os) { if (os) { + struct ofproto_sflow_port *osp; - unsigned int odp_port; ++ unsigned int xflow_port; + ofproto_sflow_clear(os); - PORT_ARRAY_FOR_EACH (osp, &os->ports, odp_port) { - ofproto_sflow_del_port(os, odp_port); ++ PORT_ARRAY_FOR_EACH (osp, &os->ports, xflow_port) { ++ ofproto_sflow_del_port(os, xflow_port); + } port_array_destroy(&os->ports); free(os); } @@@ -336,14 -331,15 +333,15 @@@ ofproto_sflow_add_port(struct ofproto_s osp->netdev = netdev; ifindex = netdev_get_ifindex(netdev); if (ifindex <= 0) { - ifindex = (os->sflow_agent->subId << 16) + odp_port; + ifindex = (os->sflow_agent->subId << 16) + xflow_port; } SFL_DS_SET(osp->dsi, 0, ifindex, 0); - port_array_set(&os->ports, odp_port, osp); + port_array_set(&os->ports, xflow_port, osp); - /* Add poller. */ + /* Add poller and sampler. */ if (os->sflow_agent) { - ofproto_sflow_add_poller(os, osp, odp_port); + ofproto_sflow_add_poller(os, osp, xflow_port); + ofproto_sflow_add_sampler(os, osp); } } @@@ -436,13 -432,13 +434,13 @@@ ofproto_sflow_set_options(struct ofprot sfl_receiver_set_sFlowRcvrTimeout(receiver, 0xffffffff); /* Set the sampling_rate down in the datapath. */ - dpif_set_sflow_probability(os->dpif, - MAX(1, UINT32_MAX / options->sampling_rate)); + wdp_set_sflow_probability(os->wdp, + MAX(1, UINT32_MAX / options->sampling_rate)); /* Add samplers and pollers for the currently known ports. */ - PORT_ARRAY_FOR_EACH (osp, &os->ports, odp_port) { - ofproto_sflow_add_poller(os, osp, odp_port); + PORT_ARRAY_FOR_EACH (osp, &os->ports, xflow_port) { - ofproto_sflow_add_sampler(os, osp, - options->sampling_rate, options->header_len); ++ ofproto_sflow_add_poller(os, osp, xflow_port); + ofproto_sflow_add_sampler(os, osp); } } @@@ -493,12 -489,12 +491,12 @@@ ofproto_sflow_received(struct ofproto_s n_actions, msg->length, min_size); return; } - actions = (const union odp_action *) (hdr + 1); + actions = (const union xflow_action *) (hdr + 1); /* Get packet payload and extract flow. */ - payload.data = (union odp_action *) (actions + n_actions); + payload.data = (union xflow_action *) (actions + n_actions); payload.size = msg->length - min_size; - flow_extract(&payload, msg->port, &flow); + flow_extract(&payload, 0, msg->port, &flow); /* Build a flow sample */ memset(&fs, 0, sizeof fs); diff --cc ofproto/ofproto.c index 344246102,41977874b..2b26d6794 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@@ -99,23 -147,79 +100,62 @@@ rule_is_hidden(const struct wdp_rule *r return false; } -static struct rule *rule_create(struct ofproto *, struct rule *super, - const union ofp_action *, size_t n_actions, - uint16_t idle_timeout, uint16_t hard_timeout, - uint64_t flow_cookie, bool send_flow_removed); -static void rule_free(struct rule *); -static void rule_destroy(struct ofproto *, struct rule *); -static struct rule *rule_from_cls_rule(const struct cls_rule *); -static void rule_insert(struct ofproto *, struct rule *, - struct ofpbuf *packet, uint16_t in_port); -static void rule_remove(struct ofproto *, struct rule *); -static bool rule_make_actions(struct ofproto *, struct rule *, - const struct ofpbuf *packet); -static void rule_install(struct ofproto *, struct rule *, - struct rule *displaced_rule); -static void rule_uninstall(struct ofproto *, struct rule *); -static void rule_post_uninstall(struct ofproto *, struct rule *); -static void send_flow_removed(struct ofproto *p, struct rule *rule, - long long int now, uint8_t reason); +static void delete_flow(struct ofproto *, struct wdp_rule *, uint8_t reason); - struct ofconn { - struct list node; - struct rconn *rconn; - struct pktbuf *pktbuf; - int miss_send_len; - - struct rconn_packet_counter *packet_in_counter; + /* ofproto supports two kinds of OpenFlow connections: + * + * - "Controller connections": Connections to ordinary OpenFlow controllers. + * ofproto maintains persistent connections to these controllers and by + * default sends them asynchronous messages such as packet-ins. + * + * - "Transient connections", e.g. from ovs-ofctl. When these connections + * drop, it is the other side's responsibility to reconnect them if + * necessary. ofproto does not send them asynchronous messages by default. + */ + enum ofconn_type { + OFCONN_CONTROLLER, /* An OpenFlow controller. */ + OFCONN_TRANSIENT /* A transient connection. */ + }; - /* Number of OpenFlow messages queued as replies to OpenFlow requests, and - * the maximum number before we stop reading OpenFlow requests. */ + /* An OpenFlow connection. */ + struct ofconn { + struct ofproto *ofproto; /* The ofproto that owns this connection. */ + struct list node; /* In struct ofproto's "all_conns" list. */ + struct rconn *rconn; /* OpenFlow connection. */ + enum ofconn_type type; /* Type. */ + + /* OFPT_PACKET_IN related data. */ + struct rconn_packet_counter *packet_in_counter; /* # queued on 'rconn'. */ + struct pinsched *schedulers[2]; /* Indexed by reason code; see below. */ + struct pktbuf *pktbuf; /* OpenFlow packet buffers. */ + int miss_send_len; /* Bytes to send of buffered packets. */ + + /* Number of OpenFlow messages queued on 'rconn' as replies to OpenFlow + * requests, and the maximum number before we stop reading OpenFlow + * requests. */ #define OFCONN_REPLY_MAX 100 struct rconn_packet_counter *reply_counter; + + /* type == OFCONN_CONTROLLER only. */ + enum nx_role role; /* Role. */ + struct hmap_node hmap_node; /* In struct ofproto's "controllers" map. */ + struct discovery *discovery; /* Controller discovery object, if enabled. */ + struct status_category *ss; /* Switch status category. */ + enum ofproto_band band; /* In-band or out-of-band? */ }; - static struct ofconn *ofconn_create(struct ofproto *, struct rconn *); + /* We use OFPR_NO_MATCH and OFPR_ACTION as indexes into struct ofconn's + * "schedulers" array. Their values are 0 and 1, and their meanings and values - * coincide with _ODPL_MISS_NR and _ODPL_ACTION_NR, so this is convenient. In ++ * coincide with WDP_CHAN_MISS and WDP_CHAN_ACTION, so this is convenient. In + * case anything ever changes, check their values here. */ + #define N_SCHEDULERS 2 + BUILD_ASSERT_DECL(OFPR_NO_MATCH == 0); -BUILD_ASSERT_DECL(OFPR_NO_MATCH == _ODPL_MISS_NR); ++BUILD_ASSERT_DECL(OFPR_NO_MATCH == WDP_CHAN_MISS); + BUILD_ASSERT_DECL(OFPR_ACTION == 1); -BUILD_ASSERT_DECL(OFPR_ACTION == _ODPL_ACTION_NR); ++BUILD_ASSERT_DECL(OFPR_ACTION == WDP_CHAN_ACTION); + + static struct ofconn *ofconn_create(struct ofproto *, struct rconn *, + enum ofconn_type); static void ofconn_destroy(struct ofconn *); static void ofconn_run(struct ofconn *, struct ofproto *); static void ofconn_wait(struct ofconn *); @@@ -122,6 -228,9 +164,9 @@@ static bool ofconn_receives_async_msgs( static void queue_tx(struct ofpbuf *msg, const struct ofconn *ofconn, struct rconn_packet_counter *counter); -static void send_packet_in(struct ofproto *, struct ofpbuf *odp_msg); -static void do_send_packet_in(struct ofpbuf *odp_msg, void *ofconn); ++static void send_packet_in(struct ofproto *, struct wdp_packet *); ++static void do_send_packet_in(struct wdp_packet *, void *ofconn); + struct ofproto { /* Settings. */ uint64_t datapath_id; /* Datapath ID. */ @@@ -138,17 -251,26 +183,20 @@@ /* Configuration. */ struct switch_status *switch_status; - struct status_category *ss_cat; - struct in_band *in_band; - struct discovery *discovery; struct fail_open *fail_open; - struct pinsched *miss_sched, *action_sched; struct netflow *netflow; struct ofproto_sflow *sflow; ++ bool tun_id_from_cookie; + + /* In-band control. */ + struct in_band *in_band; + long long int next_in_band_update; + struct sockaddr_in *extra_in_band_remotes; + size_t n_extra_remotes; - /* Flow table. */ - struct classifier cls; - bool need_revalidate; - long long int next_expiration; - struct tag_set revalidate_set; - bool tun_id_from_cookie; - /* OpenFlow connections. */ - struct list all_conns; - struct ofconn *controller; + struct hmap controllers; /* Controller "struct ofconn"s. */ + struct list all_conns; /* Contains "struct ofconn"s. */ struct pvconn **listeners; size_t n_listeners; struct pvconn **snoops; @@@ -168,10 -290,16 +216,8 @@@ static const struct ofhooks default_ofh static uint64_t pick_datapath_id(const struct ofproto *); static uint64_t pick_fallback_dpid(void); - static void send_packet_in_miss(struct wdp_packet *, void *ofproto); - static void send_packet_in_action(struct wdp_packet *, void *ofproto); -static void update_used(struct ofproto *); -static void update_stats(struct ofproto *, struct rule *, - const struct odp_flow_stats *); -static void expire_rule(struct cls_rule *, void *ofproto); -static void active_timeout(struct ofproto *ofproto, struct rule *rule); -static bool revalidate_rule(struct ofproto *p, struct rule *rule); -static void revalidate_cb(struct cls_rule *rule_, void *p_); - -static void handle_odp_msg(struct ofproto *, struct ofpbuf *); +static void handle_wdp_packet(struct ofproto *, struct wdp_packet *); static void handle_openflow(struct ofconn *, struct ofproto *, struct ofpbuf *); @@@ -235,12 -369,15 +280,9 @@@ ofproto_create(const char *datapath, co p->netflow = NULL; p->sflow = NULL; - /* Initialize flow table. */ - classifier_init(&p->cls); - p->need_revalidate = false; - p->next_expiration = time_msec() + 1000; - tag_set_init(&p->revalidate_set); - /* Initialize OpenFlow connections. */ list_init(&p->all_conns); - p->controller = ofconn_create(p, rconn_create(5, 8)); - p->controller->pktbuf = pktbuf_create(); - p->controller->miss_send_len = OFP_DEFAULT_MISS_SEND_LEN; + hmap_init(&p->controllers); p->listeners = NULL; p->n_listeners = 0; p->snoops = NULL; @@@ -275,9 -408,175 +313,175 @@@ ofproto_set_datapath_id(struct ofproto uint64_t old_dpid = p->datapath_id; p->datapath_id = datapath_id ? datapath_id : pick_datapath_id(p); if (p->datapath_id != old_dpid) { + struct ofconn *ofconn; + VLOG_INFO("datapath ID changed to %016"PRIx64, p->datapath_id); - rconn_reconnect(p->controller->rconn); + + /* Force all active connections to reconnect, since there is no way to + * notify a controller that the datapath ID has changed. */ + LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) { + rconn_reconnect(ofconn->rconn); + } + } + } + + static bool + is_discovery_controller(const struct ofproto_controller *c) + { + return !strcmp(c->target, "discover"); + } + + static bool + is_in_band_controller(const struct ofproto_controller *c) + { + return is_discovery_controller(c) || c->band == OFPROTO_IN_BAND; + } + + /* Creates a new controller in 'ofproto'. Some of the settings are initially + * drawn from 'c', but update_controller() needs to be called later to finish + * the new ofconn's configuration. */ + static void + add_controller(struct ofproto *ofproto, const struct ofproto_controller *c) + { + struct discovery *discovery; + struct ofconn *ofconn; + + if (is_discovery_controller(c)) { + int error = discovery_create(c->accept_re, c->update_resolv_conf, - ofproto->dpif, ofproto->switch_status, ++ ofproto->wdp, ofproto->switch_status, + &discovery); + if (error) { + return; + } + } else { + discovery = NULL; + } + + ofconn = ofconn_create(ofproto, rconn_create(5, 8), OFCONN_CONTROLLER); + ofconn->pktbuf = pktbuf_create(); + ofconn->miss_send_len = OFP_DEFAULT_MISS_SEND_LEN; + if (discovery) { + ofconn->discovery = discovery; + } else { + rconn_connect(ofconn->rconn, c->target); + } + hmap_insert(&ofproto->controllers, &ofconn->hmap_node, + hash_string(c->target, 0)); + } + + /* Reconfigures 'ofconn' to match 'c'. This function cannot update an ofconn's + * target or turn discovery on or off (these are done by creating new ofconns + * and deleting old ones), but it can update the rest of an ofconn's + * settings. */ + static void + update_controller(struct ofconn *ofconn, const struct ofproto_controller *c) + { + struct ofproto *ofproto = ofconn->ofproto; + int probe_interval; + int i; + + ofconn->band = (is_in_band_controller(c) + ? OFPROTO_IN_BAND : OFPROTO_OUT_OF_BAND); + + rconn_set_max_backoff(ofconn->rconn, c->max_backoff); + + probe_interval = c->probe_interval ? MAX(c->probe_interval, 5) : 0; + rconn_set_probe_interval(ofconn->rconn, probe_interval); + + if (ofconn->discovery) { + discovery_set_update_resolv_conf(ofconn->discovery, + c->update_resolv_conf); + discovery_set_accept_controller_re(ofconn->discovery, c->accept_re); + } + + for (i = 0; i < N_SCHEDULERS; i++) { + struct pinsched **s = &ofconn->schedulers[i]; + + if (c->rate_limit > 0) { + if (!*s) { + *s = pinsched_create(c->rate_limit, c->burst_limit, + ofproto->switch_status); + } else { + pinsched_set_limits(*s, c->rate_limit, c->burst_limit); + } + } else { + pinsched_destroy(*s); + *s = NULL; + } + } + } + + static const char * + ofconn_get_target(const struct ofconn *ofconn) + { + return ofconn->discovery ? "discover" : rconn_get_name(ofconn->rconn); + } + + static struct ofconn * + find_controller_by_target(struct ofproto *ofproto, const char *target) + { + struct ofconn *ofconn; + + HMAP_FOR_EACH_WITH_HASH (ofconn, struct ofconn, hmap_node, + hash_string(target, 0), &ofproto->controllers) { + if (!strcmp(ofconn_get_target(ofconn), target)) { + return ofconn; + } } + return NULL; + } + + static void + update_in_band_remotes(struct ofproto *ofproto) + { + const struct ofconn *ofconn; + struct sockaddr_in *addrs; + size_t max_addrs, n_addrs; + bool discovery; + size_t i; + + /* Allocate enough memory for as many remotes as we could possibly have. */ + max_addrs = ofproto->n_extra_remotes + hmap_count(&ofproto->controllers); + addrs = xmalloc(max_addrs * sizeof *addrs); + n_addrs = 0; + + /* Add all the remotes. */ + discovery = false; + HMAP_FOR_EACH (ofconn, struct ofconn, hmap_node, &ofproto->controllers) { + struct sockaddr_in *sin = &addrs[n_addrs]; + + sin->sin_addr.s_addr = rconn_get_remote_ip(ofconn->rconn); + if (sin->sin_addr.s_addr) { + sin->sin_port = rconn_get_remote_port(ofconn->rconn); + n_addrs++; + } + if (ofconn->discovery) { + discovery = true; + } + } + for (i = 0; i < ofproto->n_extra_remotes; i++) { + addrs[n_addrs++] = ofproto->extra_in_band_remotes[i]; + } + + /* Create or update or destroy in-band. + * + * Ordinarily we only enable in-band if there's at least one remote + * address, but discovery needs the in-band rules for DHCP to be installed + * even before we know any remote addresses. */ + if (n_addrs || discovery) { + if (!ofproto->in_band) { - in_band_create(ofproto, ofproto->dpif, ofproto->switch_status, ++ in_band_create(ofproto, ofproto->wdp, ofproto->switch_status, + &ofproto->in_band); + } + in_band_set_remotes(ofproto->in_band, addrs, n_addrs); + ofproto->next_in_band_update = time_msec() + 1000; + } else { + in_band_destroy(ofproto->in_band); + ofproto->in_band = NULL; + } + + /* Clean up. */ + free(addrs); } void @@@ -614,23 -891,31 +787,25 @@@ ofproto_destroy(struct ofproto *p return; } - /* Destroy fail-open early, because it touches the classifier. */ - ofproto_set_failure(p, false); + /* Destroy fail-open and in-band early, since they touch the classifier. */ + fail_open_destroy(p->fail_open); + p->fail_open = NULL; + + in_band_destroy(p->in_band); + p->in_band = NULL; + free(p->extra_in_band_remotes); ofproto_flush_flows(p); - classifier_destroy(&p->cls); LIST_FOR_EACH_SAFE (ofconn, next_ofconn, struct ofconn, node, &p->all_conns) { ofconn_destroy(ofconn); } + hmap_destroy(&p->controllers); - dpif_close(p->dpif); - netdev_monitor_destroy(p->netdev_monitor); - PORT_ARRAY_FOR_EACH (ofport, &p->ports, port_no) { - ofport_free(ofport); - } - shash_destroy(&p->port_by_name); + wdp_close(p->wdp); switch_status_destroy(p->switch_status); - in_band_destroy(p->in_band); - discovery_destroy(p->discovery); - pinsched_destroy(p->miss_sched); - pinsched_destroy(p->action_sched); netflow_destroy(p->netflow); ofproto_sflow_destroy(p->sflow); @@@ -667,6 -952,60 +840,49 @@@ ofproto_run(struct ofproto *p return error; } -static void -process_port_change(struct ofproto *ofproto, int error, char *devname) -{ - if (error == ENOBUFS) { - reinit_ports(ofproto); - } else if (!error) { - update_port(ofproto, devname); - free(devname); - } -} - + /* Returns a "preference level" for snooping 'ofconn'. A higher return value + * means that 'ofconn' is more interesting for monitoring than a lower return + * value. */ + static int + snoop_preference(const struct ofconn *ofconn) + { + switch (ofconn->role) { + case NX_ROLE_MASTER: + return 3; + case NX_ROLE_OTHER: + return 2; + case NX_ROLE_SLAVE: + return 1; + default: + /* Shouldn't happen. */ + return 0; + } + } + + /* One of ofproto's "snoop" pvconns has accepted a new connection on 'vconn'. + * Connects this vconn to a controller. */ + static void + add_snooper(struct ofproto *ofproto, struct vconn *vconn) + { + struct ofconn *ofconn, *best; + + /* Pick a controller for monitoring. */ + best = NULL; + LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) { + if (ofconn->type == OFCONN_CONTROLLER + && (!best || snoop_preference(ofconn) > snoop_preference(best))) { + best = ofconn; + } + } + + if (best) { + rconn_add_monitor(best->rconn, vconn); + } else { + VLOG_INFO_RL(&rl, "no controller connection to snoop"); + vconn_close(vconn); + } + } + int ofproto_run1(struct ofproto *p) { @@@ -869,15 -1278,317 +1070,18 @@@ ofproto_flush_flows(struct ofproto *ofp } } -static void -reinit_ports(struct ofproto *p) -{ - struct svec devnames; - struct ofport *ofport; - unsigned int port_no; - struct odp_port *odp_ports; - size_t n_odp_ports; - size_t i; - - svec_init(&devnames); - PORT_ARRAY_FOR_EACH (ofport, &p->ports, port_no) { - svec_add (&devnames, (char *) ofport->opp.name); - } - dpif_port_list(p->dpif, &odp_ports, &n_odp_ports); - for (i = 0; i < n_odp_ports; i++) { - svec_add (&devnames, odp_ports[i].devname); - } - free(odp_ports); - - svec_sort_unique(&devnames); - for (i = 0; i < devnames.n; i++) { - update_port(p, devnames.names[i]); - } - svec_destroy(&devnames); -} - -static size_t -refresh_port_group(struct ofproto *p, unsigned int group) -{ - uint16_t *ports; - size_t n_ports; - struct ofport *port; - unsigned int port_no; - - assert(group == DP_GROUP_ALL || group == DP_GROUP_FLOOD); - - ports = xmalloc(port_array_count(&p->ports) * sizeof *ports); - n_ports = 0; - PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) { - if (group == DP_GROUP_ALL || !(port->opp.config & OFPPC_NO_FLOOD)) { - ports[n_ports++] = port_no; - } - } - dpif_port_group_set(p->dpif, group, ports, n_ports); - free(ports); - - return n_ports; -} - -static void -refresh_port_groups(struct ofproto *p) -{ - size_t n_flood = refresh_port_group(p, DP_GROUP_FLOOD); - size_t n_all = refresh_port_group(p, DP_GROUP_ALL); - if (p->sflow) { - ofproto_sflow_set_group_sizes(p->sflow, n_flood, n_all); - } -} - -static struct ofport * -make_ofport(const struct odp_port *odp_port) -{ - struct netdev_options netdev_options; - enum netdev_flags flags; - struct ofport *ofport; - struct netdev *netdev; - bool carrier; - int error; - - memset(&netdev_options, 0, sizeof netdev_options); - netdev_options.name = odp_port->devname; - netdev_options.ethertype = NETDEV_ETH_TYPE_NONE; - netdev_options.may_open = true; - - error = netdev_open(&netdev_options, &netdev); - if (error) { - VLOG_WARN_RL(&rl, "ignoring port %s (%"PRIu16") because netdev %s " - "cannot be opened (%s)", - odp_port->devname, odp_port->port, - odp_port->devname, strerror(error)); - return NULL; - } - - ofport = xmalloc(sizeof *ofport); - ofport->netdev = netdev; - ofport->opp.port_no = odp_port_to_ofp_port(odp_port->port); - netdev_get_etheraddr(netdev, ofport->opp.hw_addr); - memcpy(ofport->opp.name, odp_port->devname, - MIN(sizeof ofport->opp.name, sizeof odp_port->devname)); - ofport->opp.name[sizeof ofport->opp.name - 1] = '\0'; - - netdev_get_flags(netdev, &flags); - ofport->opp.config = flags & NETDEV_UP ? 0 : OFPPC_PORT_DOWN; - - netdev_get_carrier(netdev, &carrier); - ofport->opp.state = carrier ? 0 : OFPPS_LINK_DOWN; - - netdev_get_features(netdev, - &ofport->opp.curr, &ofport->opp.advertised, - &ofport->opp.supported, &ofport->opp.peer); - return ofport; -} - -static bool -ofport_conflicts(const struct ofproto *p, const struct odp_port *odp_port) -{ - if (port_array_get(&p->ports, odp_port->port)) { - VLOG_WARN_RL(&rl, "ignoring duplicate port %"PRIu16" in datapath", - odp_port->port); - return true; - } else if (shash_find(&p->port_by_name, odp_port->devname)) { - VLOG_WARN_RL(&rl, "ignoring duplicate device %s in datapath", - odp_port->devname); - return true; - } else { - return false; - } -} - -static int -ofport_equal(const struct ofport *a_, const struct ofport *b_) -{ - const struct ofp_phy_port *a = &a_->opp; - const struct ofp_phy_port *b = &b_->opp; - - BUILD_ASSERT_DECL(sizeof *a == 48); /* Detect ofp_phy_port changes. */ - return (a->port_no == b->port_no - && !memcmp(a->hw_addr, b->hw_addr, sizeof a->hw_addr) - && !strcmp((char *) a->name, (char *) b->name) - && a->state == b->state - && a->config == b->config - && a->curr == b->curr - && a->advertised == b->advertised - && a->supported == b->supported - && a->peer == b->peer); -} - -static void -send_port_status(struct ofproto *p, const struct ofport *ofport, - uint8_t reason) -{ - /* XXX Should limit the number of queued port status change messages. */ - struct ofconn *ofconn; - LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) { - struct ofp_port_status *ops; - struct ofpbuf *b; - - if (!ofconn_receives_async_msgs(ofconn)) { - continue; - } - - ops = make_openflow_xid(sizeof *ops, OFPT_PORT_STATUS, 0, &b); - ops->reason = reason; - ops->desc = ofport->opp; - hton_ofp_phy_port(&ops->desc); - queue_tx(b, ofconn, NULL); - } - if (p->ofhooks->port_changed_cb) { - p->ofhooks->port_changed_cb(reason, &ofport->opp, p->aux); - } -} - -static void -ofport_install(struct ofproto *p, struct ofport *ofport) -{ - uint16_t odp_port = ofp_port_to_odp_port(ofport->opp.port_no); - const char *netdev_name = (const char *) ofport->opp.name; - - netdev_monitor_add(p->netdev_monitor, ofport->netdev); - port_array_set(&p->ports, odp_port, ofport); - shash_add(&p->port_by_name, netdev_name, ofport); - if (p->sflow) { - ofproto_sflow_add_port(p->sflow, odp_port, netdev_name); - } -} - -static void -ofport_remove(struct ofproto *p, struct ofport *ofport) -{ - uint16_t odp_port = ofp_port_to_odp_port(ofport->opp.port_no); - - netdev_monitor_remove(p->netdev_monitor, ofport->netdev); - port_array_set(&p->ports, odp_port, NULL); - shash_delete(&p->port_by_name, - shash_find(&p->port_by_name, (char *) ofport->opp.name)); - if (p->sflow) { - ofproto_sflow_del_port(p->sflow, odp_port); - } -} - -static void -ofport_free(struct ofport *ofport) -{ - if (ofport) { - netdev_close(ofport->netdev); - free(ofport); - } -} - -static void -update_port(struct ofproto *p, const char *devname) -{ - struct odp_port odp_port; - struct ofport *old_ofport; - struct ofport *new_ofport; - int error; - - COVERAGE_INC(ofproto_update_port); - - /* Query the datapath for port information. */ - error = dpif_port_query_by_name(p->dpif, devname, &odp_port); - - /* Find the old ofport. */ - old_ofport = shash_find_data(&p->port_by_name, devname); - if (!error) { - if (!old_ofport) { - /* There's no port named 'devname' but there might be a port with - * the same port number. This could happen if a port is deleted - * and then a new one added in its place very quickly, or if a port - * is renamed. In the former case we want to send an OFPPR_DELETE - * and an OFPPR_ADD, and in the latter case we want to send a - * single OFPPR_MODIFY. We can distinguish the cases by comparing - * the old port's ifindex against the new port, or perhaps less - * reliably but more portably by comparing the old port's MAC - * against the new port's MAC. However, this code isn't that smart - * and always sends an OFPPR_MODIFY (XXX). */ - old_ofport = port_array_get(&p->ports, odp_port.port); - } - } else if (error != ENOENT && error != ENODEV) { - VLOG_WARN_RL(&rl, "dpif_port_query_by_name returned unexpected error " - "%s", strerror(error)); - return; - } - - /* Create a new ofport. */ - new_ofport = !error ? make_ofport(&odp_port) : NULL; - - /* Eliminate a few pathological cases. */ - if (!old_ofport && !new_ofport) { - return; - } else if (old_ofport && new_ofport) { - /* Most of the 'config' bits are OpenFlow soft state, but - * OFPPC_PORT_DOWN is maintained the kernel. So transfer the OpenFlow - * bits from old_ofport. (make_ofport() only sets OFPPC_PORT_DOWN and - * leaves the other bits 0.) */ - new_ofport->opp.config |= old_ofport->opp.config & ~OFPPC_PORT_DOWN; - - if (ofport_equal(old_ofport, new_ofport)) { - /* False alarm--no change. */ - ofport_free(new_ofport); - return; - } - } - - /* Now deal with the normal cases. */ - if (old_ofport) { - ofport_remove(p, old_ofport); - } - if (new_ofport) { - ofport_install(p, new_ofport); - } - send_port_status(p, new_ofport ? new_ofport : old_ofport, - (!old_ofport ? OFPPR_ADD - : !new_ofport ? OFPPR_DELETE - : OFPPR_MODIFY)); - ofport_free(old_ofport); - - /* Update port groups. */ - refresh_port_groups(p); -} - -static int -init_ports(struct ofproto *p) -{ - struct odp_port *ports; - size_t n_ports; - size_t i; - int error; - - error = dpif_port_list(p->dpif, &ports, &n_ports); - if (error) { - return error; - } - - for (i = 0; i < n_ports; i++) { - const struct odp_port *odp_port = &ports[i]; - if (!ofport_conflicts(p, odp_port)) { - struct ofport *ofport = make_ofport(odp_port); - if (ofport) { - ofport_install(p, ofport); - } - } - } - free(ports); - refresh_port_groups(p); - return 0; -} - static struct ofconn * - ofconn_create(struct ofproto *p, struct rconn *rconn) + ofconn_create(struct ofproto *p, struct rconn *rconn, enum ofconn_type type) { - struct ofconn *ofconn = xmalloc(sizeof *ofconn); + struct ofconn *ofconn = xzalloc(sizeof *ofconn); + ofconn->ofproto = p; list_push_back(&p->all_conns, &ofconn->node); ofconn->rconn = rconn; + ofconn->type = type; + ofconn->role = NX_ROLE_OTHER; + ofconn->packet_in_counter = rconn_packet_counter_create (); ofconn->pktbuf = NULL; ofconn->miss_send_len = 0; - ofconn->packet_in_counter = rconn_packet_counter_create (); ofconn->reply_counter = rconn_packet_counter_create (); return ofconn; } @@@ -931,9 -1675,89 +1168,25 @@@ ofconn_wait(struct ofconn *ofconn COVERAGE_INC(ofproto_ofconn_stuck); } } + + /* Returns true if 'ofconn' should receive asynchronous messages. */ + static bool + ofconn_receives_async_msgs(const struct ofconn *ofconn) + { + if (ofconn->type == OFCONN_CONTROLLER) { + /* Ordinary controllers always get asynchronous messages unless they + * have configured themselves as "slaves". */ + return ofconn->role != NX_ROLE_SLAVE; + } else { + /* Transient connections don't get asynchronous messages unless they + * have explicitly asked for them by setting a nonzero miss send + * length. */ + return ofconn->miss_send_len > 0; + } + } -/* Caller is responsible for initializing the 'cr' member of the returned - * rule. */ -static struct rule * -rule_create(struct ofproto *ofproto, struct rule *super, - const union ofp_action *actions, size_t n_actions, - uint16_t idle_timeout, uint16_t hard_timeout, - uint64_t flow_cookie, bool send_flow_removed) -{ - struct rule *rule = xzalloc(sizeof *rule); - rule->idle_timeout = idle_timeout; - rule->hard_timeout = hard_timeout; - rule->flow_cookie = flow_cookie; - rule->used = rule->created = time_msec(); - rule->send_flow_removed = send_flow_removed; - rule->super = super; - if (super) { - list_push_back(&super->list, &rule->list); - } else { - list_init(&rule->list); - } - rule->n_actions = n_actions; - rule->actions = xmemdup(actions, n_actions * sizeof *actions); - netflow_flow_clear(&rule->nf_flow); - netflow_flow_update_time(ofproto->netflow, &rule->nf_flow, rule->created); - - return rule; -} - -static struct rule * -rule_from_cls_rule(const struct cls_rule *cls_rule) -{ - return cls_rule ? CONTAINER_OF(cls_rule, struct rule, cr) : NULL; -} - -static void -rule_free(struct rule *rule) -{ - free(rule->actions); - free(rule->odp_actions); - free(rule); -} - -/* Destroys 'rule'. If 'rule' is a subrule, also removes it from its - * super-rule's list of subrules. If 'rule' is a super-rule, also iterates - * through all of its subrules and revalidates them, destroying any that no - * longer has a super-rule (which is probably all of them). - * - * Before calling this function, the caller must make have removed 'rule' from - * the classifier. If 'rule' is an exact-match rule, the caller is also - * responsible for ensuring that it has been uninstalled from the datapath. */ -static void -rule_destroy(struct ofproto *ofproto, struct rule *rule) -{ - if (!rule->super) { - struct rule *subrule, *next; - LIST_FOR_EACH_SAFE (subrule, next, struct rule, list, &rule->list) { - revalidate_rule(ofproto, subrule); - } - } else { - list_remove(&rule->list); - } - rule_free(rule); -} - static bool -rule_has_out_port(const struct rule *rule, uint16_t out_port) +rule_has_out_port(const struct wdp_rule *rule, uint16_t out_port) { const union ofp_action *oa; struct actions_iterator i; @@@ -994,99 -2135,424 +1247,91 @@@ static in handle_echo_request(struct ofconn *ofconn, struct ofp_header *oh) { struct ofp_header *rq = oh; - queue_tx(make_echo_reply(rq), ofconn, ofconn->reply_counter); - return 0; -} - -static int -handle_features_request(struct ofproto *p, struct ofconn *ofconn, - struct ofp_header *oh) -{ - struct ofp_switch_features *osf; - struct ofpbuf *buf; - unsigned int port_no; - struct ofport *port; - - osf = make_openflow_xid(sizeof *osf, OFPT_FEATURES_REPLY, oh->xid, &buf); - osf->datapath_id = htonll(p->datapath_id); - osf->n_buffers = htonl(pktbuf_capacity()); - osf->n_tables = 2; - osf->capabilities = htonl(OFPC_FLOW_STATS | OFPC_TABLE_STATS | - OFPC_PORT_STATS | OFPC_ARP_MATCH_IP); - osf->actions = htonl((1u << OFPAT_OUTPUT) | - (1u << OFPAT_SET_VLAN_VID) | - (1u << OFPAT_SET_VLAN_PCP) | - (1u << OFPAT_STRIP_VLAN) | - (1u << OFPAT_SET_DL_SRC) | - (1u << OFPAT_SET_DL_DST) | - (1u << OFPAT_SET_NW_SRC) | - (1u << OFPAT_SET_NW_DST) | - (1u << OFPAT_SET_NW_TOS) | - (1u << OFPAT_SET_TP_SRC) | - (1u << OFPAT_SET_TP_DST)); - - PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) { - hton_ofp_phy_port(ofpbuf_put(buf, &port->opp, sizeof port->opp)); - } - - queue_tx(buf, ofconn, ofconn->reply_counter); - return 0; -} - -static int -handle_get_config_request(struct ofproto *p, struct ofconn *ofconn, - struct ofp_header *oh) -{ - struct ofpbuf *buf; - struct ofp_switch_config *osc; - uint16_t flags; - bool drop_frags; - - /* Figure out flags. */ - dpif_get_drop_frags(p->dpif, &drop_frags); - flags = drop_frags ? OFPC_FRAG_DROP : OFPC_FRAG_NORMAL; - - /* Send reply. */ - osc = make_openflow_xid(sizeof *osc, OFPT_GET_CONFIG_REPLY, oh->xid, &buf); - osc->flags = htons(flags); - osc->miss_send_len = htons(ofconn->miss_send_len); - queue_tx(buf, ofconn, ofconn->reply_counter); - - return 0; -} - -static int -handle_set_config(struct ofproto *p, struct ofconn *ofconn, - struct ofp_switch_config *osc) -{ - uint16_t flags; - int error; - - error = check_ofp_message(&osc->header, OFPT_SET_CONFIG, sizeof *osc); - if (error) { - return error; - } - flags = ntohs(osc->flags); - - if (ofconn->type == OFCONN_CONTROLLER && ofconn->role != NX_ROLE_SLAVE) { - switch (flags & OFPC_FRAG_MASK) { - case OFPC_FRAG_NORMAL: - dpif_set_drop_frags(p->dpif, false); - break; - case OFPC_FRAG_DROP: - dpif_set_drop_frags(p->dpif, true); - break; - default: - VLOG_WARN_RL(&rl, "requested bad fragment mode (flags=%"PRIx16")", - osc->flags); - break; - } - } - - ofconn->miss_send_len = ntohs(osc->miss_send_len); - - return 0; -} - -static void -add_output_group_action(struct odp_actions *actions, uint16_t group, - uint16_t *nf_output_iface) -{ - odp_actions_add(actions, ODPAT_OUTPUT_GROUP)->output_group.group = group; - - if (group == DP_GROUP_ALL || group == DP_GROUP_FLOOD) { - *nf_output_iface = NF_OUT_FLOOD; - } -} - -static void -add_controller_action(struct odp_actions *actions, - const struct ofp_action_output *oao) -{ - union odp_action *a = odp_actions_add(actions, ODPAT_CONTROLLER); - a->controller.arg = ntohs(oao->max_len); -} - -struct action_xlate_ctx { - /* Input. */ - flow_t flow; /* Flow to which these actions correspond. */ - int recurse; /* Recursion level, via xlate_table_action. */ - struct ofproto *ofproto; - const struct ofpbuf *packet; /* The packet corresponding to 'flow', or a - * null pointer if we are revalidating - * without a packet to refer to. */ - - /* Output. */ - struct odp_actions *out; /* Datapath actions. */ - tag_type *tags; /* Tags associated with OFPP_NORMAL actions. */ - bool may_set_up_flow; /* True ordinarily; false if the actions must - * be reassessed for every packet. */ - uint16_t nf_output_iface; /* Output interface index for NetFlow. */ -}; - -static void do_xlate_actions(const union ofp_action *in, size_t n_in, - struct action_xlate_ctx *ctx); - -static void -add_output_action(struct action_xlate_ctx *ctx, uint16_t port) -{ - const struct ofport *ofport = port_array_get(&ctx->ofproto->ports, port); - - if (ofport) { - if (ofport->opp.config & OFPPC_NO_FWD) { - /* Forwarding disabled on port. */ - return; - } - } else { - /* - * We don't have an ofport record for this port, but it doesn't hurt to - * allow forwarding to it anyhow. Maybe such a port will appear later - * and we're pre-populating the flow table. - */ - } - - odp_actions_add(ctx->out, ODPAT_OUTPUT)->output.port = port; - ctx->nf_output_iface = port; -} - -static struct rule * -lookup_valid_rule(struct ofproto *ofproto, const flow_t *flow) -{ - struct rule *rule; - rule = rule_from_cls_rule(classifier_lookup(&ofproto->cls, flow)); - - /* The rule we found might not be valid, since we could be in need of - * revalidation. If it is not valid, don't return it. */ - if (rule - && rule->super - && ofproto->need_revalidate - && !revalidate_rule(ofproto, rule)) { - COVERAGE_INC(ofproto_invalidated); - return NULL; - } - - return rule; -} - -static void -xlate_table_action(struct action_xlate_ctx *ctx, uint16_t in_port) -{ - if (!ctx->recurse) { - uint16_t old_in_port; - struct rule *rule; - - /* Look up a flow with 'in_port' as the input port. Then restore the - * original input port (otherwise OFPP_NORMAL and OFPP_IN_PORT will - * have surprising behavior). */ - old_in_port = ctx->flow.in_port; - ctx->flow.in_port = in_port; - rule = lookup_valid_rule(ctx->ofproto, &ctx->flow); - ctx->flow.in_port = old_in_port; - - if (rule) { - if (rule->super) { - rule = rule->super; - } - - ctx->recurse++; - do_xlate_actions(rule->actions, rule->n_actions, ctx); - ctx->recurse--; - } - } -} - -static void -xlate_output_action(struct action_xlate_ctx *ctx, - const struct ofp_action_output *oao) -{ - uint16_t odp_port; - uint16_t prev_nf_output_iface = ctx->nf_output_iface; - - ctx->nf_output_iface = NF_OUT_DROP; - - switch (ntohs(oao->port)) { - case OFPP_IN_PORT: - add_output_action(ctx, ctx->flow.in_port); - break; - case OFPP_TABLE: - xlate_table_action(ctx, ctx->flow.in_port); - break; - case OFPP_NORMAL: - if (!ctx->ofproto->ofhooks->normal_cb(&ctx->flow, ctx->packet, - ctx->out, ctx->tags, - &ctx->nf_output_iface, - ctx->ofproto->aux)) { - COVERAGE_INC(ofproto_uninstallable); - ctx->may_set_up_flow = false; - } - break; - case OFPP_FLOOD: - add_output_group_action(ctx->out, DP_GROUP_FLOOD, - &ctx->nf_output_iface); - break; - case OFPP_ALL: - add_output_group_action(ctx->out, DP_GROUP_ALL, &ctx->nf_output_iface); - break; - case OFPP_CONTROLLER: - add_controller_action(ctx->out, oao); - break; - case OFPP_LOCAL: - add_output_action(ctx, ODPP_LOCAL); - break; - default: - odp_port = ofp_port_to_odp_port(ntohs(oao->port)); - if (odp_port != ctx->flow.in_port) { - add_output_action(ctx, odp_port); - } - break; - } - - if (prev_nf_output_iface == NF_OUT_FLOOD) { - ctx->nf_output_iface = NF_OUT_FLOOD; - } else if (ctx->nf_output_iface == NF_OUT_DROP) { - ctx->nf_output_iface = prev_nf_output_iface; - } else if (prev_nf_output_iface != NF_OUT_DROP && - ctx->nf_output_iface != NF_OUT_FLOOD) { - ctx->nf_output_iface = NF_OUT_MULTI; - } -} - -static void -xlate_nicira_action(struct action_xlate_ctx *ctx, - const struct nx_action_header *nah) -{ - const struct nx_action_resubmit *nar; - const struct nx_action_set_tunnel *nast; - union odp_action *oa; - int subtype = ntohs(nah->subtype); - - assert(nah->vendor == htonl(NX_VENDOR_ID)); - switch (subtype) { - case NXAST_RESUBMIT: - nar = (const struct nx_action_resubmit *) nah; - xlate_table_action(ctx, ofp_port_to_odp_port(ntohs(nar->in_port))); - break; - - case NXAST_SET_TUNNEL: - nast = (const struct nx_action_set_tunnel *) nah; - oa = odp_actions_add(ctx->out, ODPAT_SET_TUNNEL); - ctx->flow.tun_id = oa->tunnel.tun_id = nast->tun_id; - break; - - /* If you add a new action here that modifies flow data, don't forget to - * update the flow key in ctx->flow in the same key. */ - - default: - VLOG_DBG_RL(&rl, "unknown Nicira action type %"PRIu16, subtype); - break; - } + queue_tx(make_echo_reply(rq), ofconn, ofconn->reply_counter); + return 0; } -static void -do_xlate_actions(const union ofp_action *in, size_t n_in, - struct action_xlate_ctx *ctx) +static int +handle_features_request(struct ofproto *p, struct ofconn *ofconn, + struct ofp_header *oh) { - struct actions_iterator iter; - const union ofp_action *ia; - const struct ofport *port; - - port = port_array_get(&ctx->ofproto->ports, ctx->flow.in_port); - if (port && port->opp.config & (OFPPC_NO_RECV | OFPPC_NO_RECV_STP) && - port->opp.config & (eth_addr_equals(ctx->flow.dl_dst, stp_eth_addr) - ? OFPPC_NO_RECV_STP : OFPPC_NO_RECV)) { - /* Drop this flow. */ - return; - } + struct ofpbuf *features; + int error; - for (ia = actions_first(&iter, in, n_in); ia; ia = actions_next(&iter)) { - uint16_t type = ntohs(ia->type); - union odp_action *oa; + error = wdp_get_features(p->wdp, &features); + if (!error) { + struct ofp_switch_features *osf = features->data; - switch (type) { - case OFPAT_OUTPUT: - xlate_output_action(ctx, &ia->output); - break; + update_openflow_length(features); + osf->header.version = OFP_VERSION; + osf->header.type = OFPT_FEATURES_REPLY; + osf->header.xid = oh->xid; - case OFPAT_SET_VLAN_VID: - oa = odp_actions_add(ctx->out, ODPAT_SET_VLAN_VID); - ctx->flow.dl_vlan = oa->vlan_vid.vlan_vid = ia->vlan_vid.vlan_vid; - break; + osf->datapath_id = htonll(p->datapath_id); + osf->n_buffers = htonl(pktbuf_capacity()); + memset(osf->pad, 0, sizeof osf->pad); - case OFPAT_SET_VLAN_PCP: - oa = odp_actions_add(ctx->out, ODPAT_SET_VLAN_PCP); - ctx->flow.dl_vlan_pcp = oa->vlan_pcp.vlan_pcp = ia->vlan_pcp.vlan_pcp; - break; + /* Turn on capabilities implemented by ofproto. */ + osf->capabilities |= htonl(OFPC_FLOW_STATS | OFPC_TABLE_STATS | + OFPC_PORT_STATS); - case OFPAT_STRIP_VLAN: - odp_actions_add(ctx->out, ODPAT_STRIP_VLAN); - ctx->flow.dl_vlan = OFP_VLAN_NONE; - ctx->flow.dl_vlan_pcp = 0; - break; + queue_tx(features, ofconn, ofconn->reply_counter); + } + return error; +} - case OFPAT_SET_DL_SRC: - oa = odp_actions_add(ctx->out, ODPAT_SET_DL_SRC); - memcpy(oa->dl_addr.dl_addr, - ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN); - memcpy(ctx->flow.dl_src, - ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN); - break; +static int +handle_get_config_request(struct ofproto *p, struct ofconn *ofconn, + struct ofp_header *oh) +{ + struct ofpbuf *buf; + struct ofp_switch_config *osc; + uint16_t flags; + bool drop_frags; - case OFPAT_SET_DL_DST: - oa = odp_actions_add(ctx->out, ODPAT_SET_DL_DST); - memcpy(oa->dl_addr.dl_addr, - ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN); - memcpy(ctx->flow.dl_dst, - ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN); - break; + /* Figure out flags. */ + wdp_get_drop_frags(p->wdp, &drop_frags); + flags = drop_frags ? OFPC_FRAG_DROP : OFPC_FRAG_NORMAL; - case OFPAT_SET_NW_SRC: - oa = odp_actions_add(ctx->out, ODPAT_SET_NW_SRC); - ctx->flow.nw_src = oa->nw_addr.nw_addr = ia->nw_addr.nw_addr; - break; + /* Send reply. */ + osc = make_openflow_xid(sizeof *osc, OFPT_GET_CONFIG_REPLY, oh->xid, &buf); + osc->flags = htons(flags); + osc->miss_send_len = htons(ofconn->miss_send_len); + queue_tx(buf, ofconn, ofconn->reply_counter); - case OFPAT_SET_NW_DST: - oa = odp_actions_add(ctx->out, ODPAT_SET_NW_DST); - ctx->flow.nw_dst = oa->nw_addr.nw_addr = ia->nw_addr.nw_addr; - break; + return 0; +} - case OFPAT_SET_NW_TOS: - oa = odp_actions_add(ctx->out, ODPAT_SET_NW_TOS); - ctx->flow.nw_tos = oa->nw_tos.nw_tos = ia->nw_tos.nw_tos; - break; +static int +handle_set_config(struct ofproto *p, struct ofconn *ofconn, + struct ofp_switch_config *osc) +{ + uint16_t flags; + int error; - case OFPAT_SET_TP_SRC: - oa = odp_actions_add(ctx->out, ODPAT_SET_TP_SRC); - ctx->flow.tp_src = oa->tp_port.tp_port = ia->tp_port.tp_port; - break; + error = check_ofp_message(&osc->header, OFPT_SET_CONFIG, sizeof *osc); + if (error) { + return error; + } + flags = ntohs(osc->flags); - if (ofconn == p->controller) { - case OFPAT_SET_TP_DST: - oa = odp_actions_add(ctx->out, ODPAT_SET_TP_DST); - ctx->flow.tp_dst = oa->tp_port.tp_port = ia->tp_port.tp_port; ++ if (ofconn->type == OFCONN_CONTROLLER && ofconn->role != NX_ROLE_SLAVE) { + switch (flags & OFPC_FRAG_MASK) { + case OFPC_FRAG_NORMAL: + wdp_set_drop_frags(p->wdp, false); break; - - case OFPAT_VENDOR: - xlate_nicira_action(ctx, (const struct nx_action_header *) ia); + case OFPC_FRAG_DROP: + wdp_set_drop_frags(p->wdp, true); break; - default: - VLOG_DBG_RL(&rl, "unknown action type %"PRIu16, type); + VLOG_WARN_RL(&rl, "requested bad fragment mode (flags=%"PRIx16")", + osc->flags); break; } } -} - if ((ntohs(osc->miss_send_len) != 0) != (ofconn->miss_send_len != 0)) { - if (ntohs(osc->miss_send_len) != 0) { - ofconn->pktbuf = pktbuf_create(); - } else { - pktbuf_destroy(ofconn->pktbuf); - } -static int -xlate_actions(const union ofp_action *in, size_t n_in, - const flow_t *flow, struct ofproto *ofproto, - const struct ofpbuf *packet, - struct odp_actions *out, tag_type *tags, bool *may_set_up_flow, - uint16_t *nf_output_iface) -{ - tag_type no_tags = 0; - struct action_xlate_ctx ctx; - COVERAGE_INC(ofproto_ofp2odp); - odp_actions_init(out); - ctx.flow = *flow; - ctx.recurse = 0; - ctx.ofproto = ofproto; - ctx.packet = packet; - ctx.out = out; - ctx.tags = tags ? tags : &no_tags; - ctx.may_set_up_flow = true; - ctx.nf_output_iface = NF_OUT_DROP; - do_xlate_actions(in, n_in, &ctx); - - /* Check with in-band control to see if we're allowed to set up this - * flow. */ - if (!in_band_rule_check(ofproto->in_band, flow, out)) { - ctx.may_set_up_flow = false; -- } -- - if (may_set_up_flow) { - *may_set_up_flow = ctx.may_set_up_flow; - } - if (nf_output_iface) { - *nf_output_iface = ctx.nf_output_iface; - } - if (odp_actions_overflow(out)) { - odp_actions_init(out); - return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_TOO_MANY); - } + ofconn->miss_send_len = ntohs(osc->miss_send_len); + return 0; } @@@ -1121,21 -2614,60 +1394,26 @@@ handle_packet_out(struct ofproto *p, st buffer = NULL; } - flow_extract(&payload, ntohs(opo->in_port), &flow); - flow_extract(&payload, 0, ofp_port_to_odp_port(ntohs(opo->in_port)), &flow); - error = xlate_actions((const union ofp_action *) opo->actions, n_actions, - &flow, p, &payload, &actions, NULL, NULL, NULL); - if (error) { - return error; - } - - dpif_execute(p->dpif, flow.in_port, actions.actions, actions.n_actions, - &payload); ++ flow_extract(&payload, 0, ntohs(opo->in_port), &flow); + wdp_execute(p->wdp, flow.in_port, (const union ofp_action *) actions, + n_actions, &payload); ofpbuf_delete(buffer); return 0; } -static void -update_port_config(struct ofproto *p, struct ofport *port, - uint32_t config, uint32_t mask) -{ - mask &= config ^ port->opp.config; - if (mask & OFPPC_PORT_DOWN) { - if (config & OFPPC_PORT_DOWN) { - netdev_turn_flags_off(port->netdev, NETDEV_UP, true); - } else { - netdev_turn_flags_on(port->netdev, NETDEV_UP, true); - } - } -#define REVALIDATE_BITS (OFPPC_NO_RECV | OFPPC_NO_RECV_STP | OFPPC_NO_FWD) - if (mask & REVALIDATE_BITS) { - COVERAGE_INC(ofproto_costly_flags); - port->opp.config ^= mask & REVALIDATE_BITS; - p->need_revalidate = true; - } -#undef REVALIDATE_BITS - if (mask & OFPPC_NO_FLOOD) { - port->opp.config ^= OFPPC_NO_FLOOD; - refresh_port_groups(p); - } - if (mask & OFPPC_NO_PACKET_IN) { - port->opp.config ^= OFPPC_NO_PACKET_IN; - } -} - static int - handle_port_mod(struct ofproto *p, struct ofp_header *oh) + handle_port_mod(struct ofproto *p, struct ofconn *ofconn, + struct ofp_header *oh) { const struct ofp_port_mod *opm; - struct ofport *port; + struct wdp_port port; int error; + error = reject_slave_controller(ofconn, oh); + if (error) { + return error; + } error = check_ofp_message(oh, OFPT_PORT_MOD, sizeof *opm); if (error) { return error; @@@ -1248,19 -2788,20 +1526,20 @@@ handle_table_stats_request(struct ofpro memset(ots, 0, sizeof *ots); ots->table_id = TABLEID_CLASSIFIER; strcpy(ots->name, "classifier"); - ots->wildcards = htonl(OFPFW_ALL); + ots->wildcards = p->tun_id_from_cookie ? htonl(OVSFW_ALL) + : htonl(OFPFW_ALL); - ots->max_entries = htonl(65536); - ots->active_count = htonl(n_wild); - ots->lookup_count = htonll(0); /* XXX */ - ots->matched_count = htonll(0); /* XXX */ + ots->max_entries = htonl(dpstats.wild.max_capacity); + ots->active_count = htonl(dpstats.wild.n_flows); + ots->lookup_count = htonll(dpstats.wild.n_hit + dpstats.wild.n_missed); + ots->matched_count = htonll(dpstats.wild.n_hit); queue_tx(msg, ofconn, ofconn->reply_counter); return 0; } static void -append_port_stat(struct ofport *port, uint16_t port_no, struct ofconn *ofconn, +append_port_stat(struct wdp_port *port, struct ofconn *ofconn, - struct ofpbuf *msg) + struct ofpbuf **msgp) { struct netdev_stats stats; struct ofp_port_stats *ops; @@@ -1270,8 -2811,8 +1549,8 @@@ * netdev_get_stats() will log errors. */ netdev_get_stats(port->netdev, &stats); - ops = append_stats_reply(sizeof *ops, ofconn, &msg); + ops = append_stats_reply(sizeof *ops, ofconn, msgp); - ops->port_no = htons(odp_port_to_ofp_port(port_no)); + ops->port_no = htons(port->opp.port_no); memset(ops->pad, 0, sizeof ops->pad); ops->rx_packets = htonll(stats.rx_packets); ops->tx_packets = htonll(stats.tx_packets); @@@ -1303,22 -2846,15 +1582,22 @@@ handle_port_stats_request(struct ofprot msg = start_stats_reply(osr, sizeof *ops * 16); if (psr->port_no != htons(OFPP_NONE)) { - port = port_array_get(&p->ports, - ofp_port_to_odp_port(ntohs(psr->port_no))); - if (port) { - append_port_stat(port, ntohs(psr->port_no), ofconn, &msg); + struct wdp_port port; + + if (!wdp_port_query_by_number(p->wdp, ntohs(psr->port_no), &port)) { - append_port_stat(&port, ofconn, msg); ++ append_port_stat(&port, ofconn, &msg); + wdp_port_free(&port); } } else { - PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) { - append_port_stat(port, port_no, ofconn, &msg); + struct wdp_port *ports; + size_t n_ports; + size_t i; + + wdp_port_list(p->wdp, &ports, &n_ports); + for (i = 0; i < n_ports; i++) { - append_port_stat(&ports[i], ofconn, msg); ++ append_port_stat(&ports[i], ofconn, &msg); } + wdp_port_array_free(ports, n_ports); } queue_tx(msg, ofconn, ofconn->reply_counter); @@@ -1373,13 -2945,14 +1652,14 @@@ flow_stats_cb(struct wdp_rule *rule, vo ofs = append_stats_reply(len, cbdata->ofconn, &cbdata->msg); ofs->length = htons(len); - ofs->table_id = rule->cr.wc.wildcards ? TABLEID_CLASSIFIER : TABLEID_HASH; + ofs->table_id = rule->cr.flow.wildcards ? TABLEID_CLASSIFIER : TABLEID_HASH; ofs->pad = 0; - flow_to_match(&rule->cr.flow, &ofs->match); - flow_to_match(&rule->cr.flow, rule->cr.wc.wildcards, - cbdata->ofproto->tun_id_from_cookie, &ofs->match); ++ flow_to_match(&rule->cr.flow, cbdata->ofproto->tun_id_from_cookie, ++ &ofs->match); ofs->duration_sec = htonl(sec); ofs->duration_nsec = htonl(msec * 1000000); - ofs->cookie = rule->flow_cookie; - ofs->priority = htons(rule->cr.priority); + ofs->cookie = ofproto_rule_cast(rule)->flow_cookie; + ofs->priority = htons(rule->cr.flow.priority); ofs->idle_timeout = htons(rule->idle_timeout); ofs->hard_timeout = htons(rule->hard_timeout); memset(ofs->pad2, 0, sizeof ofs->pad2); @@@ -1416,10 -2989,10 +1696,10 @@@ handle_flow_stats_request(struct ofprot cbdata.ofconn = ofconn; cbdata.out_port = fsr->out_port; cbdata.msg = start_stats_reply(osr, 1024); - flow_from_match(&target, 0, &fsr->match); - cls_rule_from_match(&fsr->match, 0, false, 0, &target); - classifier_for_each_match(&p->cls, &target, - table_id_to_include(fsr->table_id), - flow_stats_cb, &cbdata); ++ flow_from_match(&fsr->match, 0, false, 0, &target); + wdp_flow_for_each_match(p->wdp, &target, + table_id_to_include(fsr->table_id), + flow_stats_cb, &cbdata); queue_tx(cbdata.msg, ofconn, ofconn->reply_counter); return 0; } @@@ -1438,8 -3012,14 +1718,9 @@@ flow_stats_ds_cb(struct wdp_rule *rule uint64_t packet_count, byte_count; size_t act_len = sizeof *rule->actions * rule->n_actions; - /* Don't report on subrules. */ - if (rule->super != NULL) { - return; - } - query_stats(cbdata->ofproto, rule, &packet_count, &byte_count); - flow_to_match(&rule->cr.flow, &match); - flow_to_match(&rule->cr.flow, rule->cr.wc.wildcards, - cbdata->ofproto->tun_id_from_cookie, &match); ++ flow_to_match(&rule->cr.flow, cbdata->ofproto->tun_id_from_cookie, ++ &match); ds_put_format(results, "duration=%llds, ", (time_msec() - rule->created) / 1000); @@@ -1456,19 -3036,19 +1737,19 @@@ void ofproto_get_all_flows(struct ofproto *p, struct ds *results) { - struct ofp_match match; - struct cls_rule target; struct flow_stats_ds_cbdata cbdata; + struct ofp_match match; + flow_t target; memset(&match, 0, sizeof match); - match.wildcards = htonl(OFPFW_ALL); + match.wildcards = htonl(OVSFW_ALL); cbdata.ofproto = p; cbdata.results = results; - flow_from_match(&target, 0, &match); - cls_rule_from_match(&match, 0, false, 0, &target); - classifier_for_each_match(&p->cls, &target, CLS_INC_ALL, - flow_stats_ds_cb, &cbdata); ++ flow_from_match(&match, 0, false, 0, &target); + wdp_flow_for_each_match(p->wdp, &target, CLS_INC_ALL, + flow_stats_ds_cb, &cbdata); } struct aggregate_stats_cbdata { @@@ -1518,10 -3099,10 +1799,10 @@@ handle_aggregate_stats_request(struct o cbdata.packet_count = 0; cbdata.byte_count = 0; cbdata.n_flows = 0; - flow_from_match(&target, 0, &asr->match); - cls_rule_from_match(&asr->match, 0, false, 0, &target); - classifier_for_each_match(&p->cls, &target, - table_id_to_include(asr->table_id), - aggregate_stats_cb, &cbdata); ++ flow_from_match(&asr->match, 0, false, 0, &target); + wdp_flow_for_each_match(p->wdp, &target, + table_id_to_include(asr->table_id), + aggregate_stats_cb, &cbdata); msg = start_stats_reply(osr, sizeof *reply); reply = append_stats_reply(sizeof *reply, ofconn, &msg); @@@ -1571,139 -3152,311 +1852,266 @@@ handle_stats_request(struct ofproto *p } } -static long long int -msec_from_nsec(uint64_t sec, uint32_t nsec) -{ - return !sec ? 0 : sec * 1000 + nsec / 1000000; -} - -static void -update_time(struct ofproto *ofproto, struct rule *rule, - const struct odp_flow_stats *stats) -{ - long long int used = msec_from_nsec(stats->used_sec, stats->used_nsec); - if (used > rule->used) { - rule->used = used; - if (rule->super && used > rule->super->used) { - rule->super->used = used; - } - netflow_flow_update_time(ofproto->netflow, &rule->nf_flow, used); - } -} - -static void -update_stats(struct ofproto *ofproto, struct rule *rule, - const struct odp_flow_stats *stats) -{ - if (stats->n_packets) { - update_time(ofproto, rule, stats); - rule->packet_count += stats->n_packets; - rule->byte_count += stats->n_bytes; - netflow_flow_update_flags(&rule->nf_flow, stats->ip_tos, - stats->tcp_flags); - } -} - + /* Implements OFPFC_ADD and the cases for OFPFC_MODIFY and OFPFC_MODIFY_STRICT + * in which no matching flow already exists in the flow table. + * + * Adds the flow specified by 'ofm', which is followed by 'n_actions' + * ofp_actions, to 'p''s flow table. Returns 0 on success or an OpenFlow error + * code as encoded by ofp_mkerr() on failure. + * + * 'ofconn' is used to retrieve the packet buffer specified in ofm->buffer_id, + * if any. */ static int add_flow(struct ofproto *p, struct ofconn *ofconn, - struct ofp_flow_mod *ofm, size_t n_actions) + const struct ofp_flow_mod *ofm, size_t n_actions) { + struct wdp_rule *rule; + struct wdp_flow_put put; struct ofpbuf *packet; - struct rule *rule; uint16_t in_port; + flow_t flow; int error; - flow_from_match(&flow, ntohs(ofm->priority), &ofm->match); - if (ofm->flags & htons(OFPFF_CHECK_OVERLAP)) { - flow_t flow; - uint32_t wildcards; - - flow_from_match(&ofm->match, p->tun_id_from_cookie, ofm->cookie, - &flow, &wildcards); - if (classifier_rule_overlaps(&p->cls, &flow, wildcards, - ntohs(ofm->priority))) { - return ofp_mkerr(OFPET_FLOW_MOD_FAILED, OFPFMFC_OVERLAP); - } ++ flow_from_match(&ofm->match, ntohs(ofm->priority), p->tun_id_from_cookie, ++ ofm->cookie, &flow); + if (ofm->flags & htons(OFPFF_CHECK_OVERLAP) + && wdp_flow_overlaps(p->wdp, &flow)) { + return ofp_mkerr(OFPET_FLOW_MOD_FAILED, OFPFMFC_OVERLAP); } - rule = rule_create(p, NULL, (const union ofp_action *) ofm->actions, - n_actions, ntohs(ofm->idle_timeout), - ntohs(ofm->hard_timeout), ofm->cookie, - ofm->flags & htons(OFPFF_SEND_FLOW_REM)); - cls_rule_from_match(&ofm->match, ntohs(ofm->priority), - p->tun_id_from_cookie, ofm->cookie, &rule->cr); + put.flags = WDP_PUT_CREATE | WDP_PUT_MODIFY | WDP_PUT_ALL; + put.flow = &flow; + put.actions = (const union ofp_action *) ofm->actions; + put.n_actions = n_actions; + put.idle_timeout = ntohs(ofm->idle_timeout); + put.hard_timeout = ntohs(ofm->hard_timeout); + error = wdp_flow_put(p->wdp, &put, NULL, &rule); + if (error) { + /* XXX wdp_flow_put should return OpenFlow error code. */ + return error; + } + ofproto_rule_init(rule); - error = 0; if (ofm->buffer_id != htonl(UINT32_MAX)) { error = pktbuf_retrieve(ofconn->pktbuf, ntohl(ofm->buffer_id), &packet, &in_port); - } else { - packet = NULL; - in_port = UINT16_MAX; + if (!error) { + wdp_flow_inject(p->wdp, rule, in_port, packet); + ofpbuf_delete(packet); + } } - rule_insert(p, rule, packet, in_port); - ofpbuf_delete(packet); - return error; + return 0; } -static struct rule * ++static struct wdp_rule * + find_flow_strict(struct ofproto *p, const struct ofp_flow_mod *ofm) + { - uint32_t wildcards; + flow_t flow; + - flow_from_match(&ofm->match, p->tun_id_from_cookie, ofm->cookie, - &flow, &wildcards); - return rule_from_cls_rule(classifier_find_rule_exactly( - &p->cls, &flow, wildcards, - ntohs(ofm->priority))); ++ flow_from_match(&ofm->match, ntohs(ofm->priority), ++ p->tun_id_from_cookie, ofm->cookie, &flow); ++ return wdp_flow_get(p->wdp, &flow); + } + static int - modify_flow(struct ofproto *p, const struct ofp_flow_mod *ofm, - size_t n_actions, uint16_t command, struct wdp_rule *rule) + send_buffered_packet(struct ofproto *ofproto, struct ofconn *ofconn, - struct rule *rule, const struct ofp_flow_mod *ofm) ++ struct wdp_rule *rule, const struct ofp_flow_mod *ofm) { - if (rule_is_hidden(rule)) { + struct ofpbuf *packet; + uint16_t in_port; - flow_t flow; + int error; + + if (ofm->buffer_id == htonl(UINT32_MAX)) { return 0; } - if (command == OFPFC_DELETE) { - delete_flow(p, rule, OFPPR_DELETE); - } else { - const struct ofp_action_header *actions = ofm->actions; - struct wdp_flow_put put; - - ofproto_rule_cast(rule)->flow_cookie = ofm->cookie; - - put.flags = WDP_PUT_MODIFY | WDP_PUT_ACTIONS; - put.flow = &rule->cr.flow; - put.actions = (const union ofp_action *) actions; - put.n_actions = n_actions; - put.idle_timeout = put.hard_timeout = 0; - wdp_flow_put(p->wdp, &put, NULL, NULL); + error = pktbuf_retrieve(ofconn->pktbuf, ntohl(ofm->buffer_id), + &packet, &in_port); + if (error) { + return error; } - flow_extract(packet, 0, in_port, &flow); - rule_execute(ofproto, rule, packet, &flow); ++ wdp_flow_inject(ofproto->wdp, rule, in_port, packet); + ofpbuf_delete(packet); + return 0; } + + /* OFPFC_MODIFY and OFPFC_MODIFY_STRICT. */ + + struct modify_flows_cbdata { + struct ofproto *ofproto; + const struct ofp_flow_mod *ofm; + size_t n_actions; - struct rule *match; ++ struct wdp_rule *match; + }; + + static int modify_flow(struct ofproto *, const struct ofp_flow_mod *, - size_t n_actions, struct rule *); -static void modify_flows_cb(struct cls_rule *, void *cbdata_); ++ size_t n_actions, struct wdp_rule *); ++static void modify_flows_cb(struct wdp_rule *, void *cbdata_); + /* Implements OFPFC_MODIFY. Returns 0 on success or an OpenFlow error code as + * encoded by ofp_mkerr() on failure. + * + * 'ofconn' is used to retrieve the packet buffer specified in ofm->buffer_id, + * if any. */ static int - modify_flows_strict(struct ofproto *p, const struct ofp_flow_mod *ofm, - size_t n_actions, uint16_t command) + modify_flows_loose(struct ofproto *p, struct ofconn *ofconn, + const struct ofp_flow_mod *ofm, size_t n_actions) { - struct wdp_rule *rule; - flow_t flow; + struct modify_flows_cbdata cbdata; - struct cls_rule target; ++ flow_t target; - flow_from_match(&flow, ntohs(ofm->priority), &ofm->match); - rule = wdp_flow_get(p->wdp, &flow); + cbdata.ofproto = p; + cbdata.ofm = ofm; + cbdata.n_actions = n_actions; + cbdata.match = NULL; - if (rule) { - if (command == OFPFC_DELETE - && ofm->out_port != htons(OFPP_NONE) - && !rule_has_out_port(rule, ofm->out_port)) { - return 0; - } - cls_rule_from_match(&ofm->match, 0, p->tun_id_from_cookie, ofm->cookie, - &target); ++ flow_from_match(&ofm->match, 0, p->tun_id_from_cookie, ofm->cookie, ++ &target); - modify_flow(p, ofm, n_actions, command, rule); - classifier_for_each_match(&p->cls, &target, CLS_INC_ALL, - modify_flows_cb, &cbdata); ++ wdp_flow_for_each_match(p->wdp, &target, CLS_INC_ALL, ++ modify_flows_cb, &cbdata); + if (cbdata.match) { + /* This credits the packet to whichever flow happened to happened to + * match last. That's weird. Maybe we should do a lookup for the + * flow that actually matches the packet? Who knows. */ + send_buffered_packet(p, ofconn, cbdata.match, ofm); + return 0; + } else { + return add_flow(p, ofconn, ofm, n_actions); } - return 0; } - struct modify_flows_cbdata { - struct ofproto *ofproto; - const struct ofp_flow_mod *ofm; - uint16_t out_port; - size_t n_actions; - uint16_t command; - }; + /* Implements OFPFC_MODIFY_STRICT. Returns 0 on success or an OpenFlow error + * code as encoded by ofp_mkerr() on failure. + * + * 'ofconn' is used to retrieve the packet buffer specified in ofm->buffer_id, + * if any. */ + static int + modify_flow_strict(struct ofproto *p, struct ofconn *ofconn, + struct ofp_flow_mod *ofm, size_t n_actions) + { - struct rule *rule = find_flow_strict(p, ofm); ++ struct wdp_rule *rule = find_flow_strict(p, ofm); + if (rule && !rule_is_hidden(rule)) { + modify_flow(p, ofm, n_actions, rule); + return send_buffered_packet(p, ofconn, rule, ofm); + } else { + return add_flow(p, ofconn, ofm, n_actions); + } + } + /* Callback for modify_flows_loose(). */ static void -modify_flows_cb(struct cls_rule *rule_, void *cbdata_) +modify_flows_cb(struct wdp_rule *rule, void *cbdata_) { - struct rule *rule = rule_from_cls_rule(rule_); struct modify_flows_cbdata *cbdata = cbdata_; - if (cbdata->out_port != htons(OFPP_NONE) - && !rule_has_out_port(rule, cbdata->out_port)) { - return; + if (!rule_is_hidden(rule)) { + cbdata->match = rule; + modify_flow(cbdata->ofproto, cbdata->ofm, cbdata->n_actions, rule); } - - modify_flow(cbdata->ofproto, cbdata->ofm, cbdata->n_actions, - cbdata->command, rule); } + /* Implements core of OFPFC_MODIFY and OFPFC_MODIFY_STRICT where 'rule' has + * been identified as a flow in 'p''s flow table to be modified, by changing + * the rule's actions to match those in 'ofm' (which is followed by 'n_actions' + * ofp_action[] structures). */ static int - modify_flows_loose(struct ofproto *p, const struct ofp_flow_mod *ofm, - size_t n_actions, uint16_t command) + modify_flow(struct ofproto *p, const struct ofp_flow_mod *ofm, - size_t n_actions, struct rule *rule) ++ size_t n_actions, struct wdp_rule *rule) { - struct modify_flows_cbdata cbdata; - size_t actions_len = n_actions * sizeof *rule->actions; ++ const struct ofp_action_header *actions = ofm->actions; ++ struct ofproto_rule *ofproto_rule = ofproto_rule_cast(rule); ++ struct wdp_flow_put put; + - rule->flow_cookie = ofm->cookie; ++ ofproto_rule->flow_cookie = ofm->cookie; + + /* If the actions are the same, do nothing. */ + if (n_actions == rule->n_actions - && !memcmp(ofm->actions, rule->actions, actions_len)) ++ && !memcmp(ofm->actions, rule->actions, sizeof *actions * n_actions)) + { + return 0; + } + - /* Replace actions. */ - free(rule->actions); - rule->actions = xmemdup(ofm->actions, actions_len); - rule->n_actions = n_actions; - - /* Make sure that the datapath gets updated properly. */ - if (rule->cr.wc.wildcards) { - COVERAGE_INC(ofproto_mod_wc_flow); - p->need_revalidate = true; - } else { - rule_update_actions(p, rule); - } - - return 0; ++ put.flags = WDP_PUT_MODIFY | WDP_PUT_ACTIONS; ++ put.flow = &rule->cr.flow; ++ put.actions = (const union ofp_action *) actions; ++ put.n_actions = n_actions; ++ put.idle_timeout = put.hard_timeout = 0; ++ return wdp_flow_put(p->wdp, &put, NULL, NULL); + } + + /* OFPFC_DELETE implementation. */ + + struct delete_flows_cbdata { + struct ofproto *ofproto; + uint16_t out_port; + }; + -static void delete_flows_cb(struct cls_rule *, void *cbdata_); -static void delete_flow(struct ofproto *, struct rule *, uint16_t out_port); ++static void delete_flows_cb(struct wdp_rule *, void *cbdata_); ++static void delete_flow_core(struct ofproto *, struct wdp_rule *, ++ uint16_t out_port); + + /* Implements OFPFC_DELETE. */ + static void + delete_flows_loose(struct ofproto *p, const struct ofp_flow_mod *ofm) + { + struct delete_flows_cbdata cbdata; - struct cls_rule target; + flow_t target; cbdata.ofproto = p; - cbdata.ofm = ofm; - cbdata.out_port = (command == OFPFC_DELETE ? ofm->out_port - : htons(OFPP_NONE)); - cbdata.n_actions = n_actions; - cbdata.command = command; + cbdata.out_port = ofm->out_port; + - cls_rule_from_match(&ofm->match, 0, p->tun_id_from_cookie, ofm->cookie, - &target); ++ flow_from_match(&ofm->match, 0, p->tun_id_from_cookie, ofm->cookie, ++ &target); - flow_from_match(&target, 0, &ofm->match); - classifier_for_each_match(&p->cls, &target, CLS_INC_ALL, - delete_flows_cb, &cbdata); + wdp_flow_for_each_match(p->wdp, &target, CLS_INC_ALL, - modify_flows_cb, &cbdata); - return 0; ++ delete_flows_cb, &cbdata); } + /* Implements OFPFC_DELETE_STRICT. */ + static void + delete_flow_strict(struct ofproto *p, struct ofp_flow_mod *ofm) + { - struct rule *rule = find_flow_strict(p, ofm); ++ struct wdp_rule *rule = find_flow_strict(p, ofm); + if (rule) { - delete_flow(p, rule, ofm->out_port); ++ delete_flow_core(p, rule, ofm->out_port); + } + } + + /* Callback for delete_flows_loose(). */ + static void -delete_flows_cb(struct cls_rule *rule_, void *cbdata_) ++delete_flows_cb(struct wdp_rule *rule, void *cbdata_) + { - struct rule *rule = rule_from_cls_rule(rule_); + struct delete_flows_cbdata *cbdata = cbdata_; + - delete_flow(cbdata->ofproto, rule, cbdata->out_port); ++ delete_flow_core(cbdata->ofproto, rule, cbdata->out_port); + } + + /* Implements core of OFPFC_DELETE and OFPFC_DELETE_STRICT where 'rule' has + * been identified as a flow to delete from 'p''s flow table, by deleting the + * flow and sending out a OFPT_FLOW_REMOVED message to any interested + * controller. + * + * Will not delete 'rule' if it is hidden. Will delete 'rule' only if + * 'out_port' is htons(OFPP_NONE) or if 'rule' actually outputs to the + * specified 'out_port'. */ + static void -delete_flow(struct ofproto *p, struct rule *rule, uint16_t out_port) ++delete_flow_core(struct ofproto *p, struct wdp_rule *rule, uint16_t out_port) + { + if (rule_is_hidden(rule)) { + return; + } + + if (out_port != htons(OFPP_NONE) && !rule_has_out_port(rule, out_port)) { + return; + } + - send_flow_removed(p, rule, time_msec(), OFPRR_DELETE); - rule_remove(p, rule); ++ delete_flow(p, rule, OFPRR_DELETE); + } + static int handle_flow_mod(struct ofproto *p, struct ofconn *ofconn, struct ofp_flow_mod *ofm) @@@ -1738,19 -3495,21 +2150,21 @@@ switch (ntohs(ofm->command)) { case OFPFC_ADD: -- return add_flow(p, ofconn, ofm, n_actions); ++ return modify_flows_loose(p, ofconn, ofm, n_actions); case OFPFC_MODIFY: - return modify_flows_loose(p, ofm, n_actions, OFPFC_MODIFY); - return modify_flows_loose(p, ofconn, ofm, n_actions); ++ return modify_flow_strict(p, ofconn, ofm, n_actions); case OFPFC_MODIFY_STRICT: - return modify_flows_strict(p, ofm, n_actions, OFPFC_MODIFY); + return modify_flow_strict(p, ofconn, ofm, n_actions); case OFPFC_DELETE: - return modify_flows_loose(p, ofm, n_actions, OFPFC_DELETE); + delete_flows_loose(p, ofm); + return 0; case OFPFC_DELETE_STRICT: - return modify_flows_strict(p, ofm, n_actions, OFPFC_DELETE); + delete_flow_strict(p, ofm); + return 0; default: return ofp_mkerr(OFPET_FLOW_MOD_FAILED, OFPFMFC_BAD_COMMAND); @@@ -1865,23 -3703,37 +2358,23 @@@ handle_openflow(struct ofconn *ofconn, } static void -handle_odp_miss_msg(struct ofproto *p, struct ofpbuf *packet) +handle_flow_miss(struct ofproto *p, struct wdp_packet *packet) { - struct odp_msg *msg = packet->data; - struct rule *rule; - struct ofpbuf payload; + struct wdp_rule *rule; flow_t flow; - flow_extract(packet->payload, packet->in_port, &flow); - payload.data = msg + 1; - payload.size = msg->length - sizeof *msg; - flow_extract(&payload, msg->arg, msg->port, &flow); - - /* Check with in-band control to see if this packet should be sent - * to the local port regardless of the flow table. */ - if (in_band_msg_in_hook(p->in_band, &flow, &payload)) { - union odp_action action; - - memset(&action, 0, sizeof(action)); - action.output.type = ODPAT_OUTPUT; - action.output.port = ODPP_LOCAL; - dpif_execute(p->dpif, flow.in_port, &action, 1, &payload); - } - - rule = lookup_valid_rule(p, &flow); ++ flow_extract(packet->payload, packet->tun_id, packet->in_port, &flow); + rule = wdp_flow_match(p->wdp, &flow); if (!rule) { /* Don't send a packet-in if OFPPC_NO_PACKET_IN asserted. */ - struct ofport *port = port_array_get(&p->ports, msg->port); - if (port) { - if (port->opp.config & OFPPC_NO_PACKET_IN) { + struct wdp_port port; + + if (!wdp_port_query_by_number(p->wdp, packet->in_port, &port)) { + bool no_packet_in = (port.opp.config & OFPPC_NO_PACKET_IN) != 0; + wdp_port_free(&port); + if (no_packet_in) { COVERAGE_INC(ofproto_no_packet_in); - /* XXX install 'drop' flow entry */ - ofpbuf_delete(packet); + wdp_packet_destroy(packet); return; } } else { @@@ -1895,10 -3745,24 +2387,9 @@@ return; } - if (rule->cr.wc.wildcards) { - rule = rule_create_subrule(p, rule, &flow); - rule_make_actions(p, rule, packet); - } else { - if (!rule->may_install) { - /* The rule is not installable, that is, we need to process every - * packet, so process the current packet and set its actions into - * 'subrule'. */ - rule_make_actions(p, rule, packet); - } else { - /* XXX revalidate rule if it needs it */ - } - } - - rule_execute(p, rule, &payload, &flow); - rule_reinstall(p, rule); + wdp_flow_inject(p->wdp, rule, packet->in_port, packet->payload); - if (rule->cr.flow.priority == FAIL_OPEN_PRIORITY - && rconn_is_connected(p->controller->rconn)) { - if (rule->super && rule->super->cr.priority == FAIL_OPEN_PRIORITY) { ++ if (rule->cr.flow.priority == FAIL_OPEN_PRIORITY) { /* * Extra-special case for fail-open mode. * @@@ -1909,54 -3773,97 +2400,53 @@@ * * See the top-level comment in fail-open.c for more information. */ - pinsched_send(p->miss_sched, packet->in_port, packet, - send_packet_in_miss, p); + send_packet_in(p, packet); } else { - ofpbuf_delete(packet); + wdp_packet_destroy(packet); } } static void -handle_odp_msg(struct ofproto *p, struct ofpbuf *packet) +handle_wdp_packet(struct ofproto *p, struct wdp_packet *packet) { - struct odp_msg *msg = packet->data; - - switch (msg->type) { - case _ODPL_ACTION_NR: + switch (packet->channel) { + case WDP_CHAN_ACTION: COVERAGE_INC(ofproto_ctlr_action); - pinsched_send(p->action_sched, packet->in_port, packet, - send_packet_in_action, p); + send_packet_in(p, packet); break; - case _ODPL_SFLOW_NR: - if (p->sflow) { - ofproto_sflow_received(p->sflow, msg); - } - ofpbuf_delete(packet); + case WDP_CHAN_SFLOW: + /* XXX */ + wdp_packet_destroy(packet); break; - case _ODPL_MISS_NR: - handle_odp_miss_msg(p, packet); + case WDP_CHAN_MISS: + handle_flow_miss(p, packet); break; + case WDP_N_CHANS: default: - VLOG_WARN_RL(&rl, "received ODP message of unexpected type %"PRIu32, - msg->type); + wdp_packet_destroy(packet); + VLOG_WARN_RL(&rl, "received message on unexpected channel %d", + (int) packet->channel); break; } } -static void -revalidate_cb(struct cls_rule *sub_, void *cbdata_) -{ - struct rule *sub = rule_from_cls_rule(sub_); - struct revalidate_cbdata *cbdata = cbdata_; - - if (cbdata->revalidate_all - || (cbdata->revalidate_subrules && sub->super) - || (tag_set_intersects(&cbdata->revalidate_set, sub->tags))) { - revalidate_rule(cbdata->ofproto, sub); - } -} - -static bool -revalidate_rule(struct ofproto *p, struct rule *rule) -{ - const flow_t *flow = &rule->cr.flow; - - COVERAGE_INC(ofproto_revalidate_rule); - if (rule->super) { - struct rule *super; - super = rule_from_cls_rule(classifier_lookup_wild(&p->cls, flow)); - if (!super) { - rule_remove(p, rule); - return false; - } else if (super != rule->super) { - COVERAGE_INC(ofproto_revalidate_moved); - list_remove(&rule->list); - list_push_back(&super->list, &rule->list); - rule->super = super; - rule->hard_timeout = super->hard_timeout; - rule->idle_timeout = super->idle_timeout; - rule->created = super->created; - rule->used = 0; - } - } - - rule_update_actions(p, rule); - return true; -} - static struct ofpbuf * - compose_flow_removed(const struct wdp_rule *rule, uint8_t reason) -compose_flow_removed(struct ofproto *p, const struct rule *rule, - long long int now, uint8_t reason) ++compose_flow_removed(struct ofproto *p, const struct wdp_rule *rule, ++ uint8_t reason) { - struct ofp_flow_removed *ofr; - struct ofpbuf *buf; - long long int tdiff = now - rule->created; + long long int tdiff = time_msec() - rule->created; uint32_t sec = tdiff / 1000; uint32_t msec = tdiff - (sec * 1000); + struct ofp_flow_removed *ofr; + struct ofpbuf *buf; ofr = make_openflow(sizeof *ofr, OFPT_FLOW_REMOVED, &buf); - flow_to_match(&rule->cr.flow, &ofr->match); - flow_to_match(&rule->cr.flow, rule->cr.wc.wildcards, p->tun_id_from_cookie, - &ofr->match); - ofr->cookie = rule->flow_cookie; - ofr->priority = htons(rule->cr.priority); ++ flow_to_match(&rule->cr.flow, p->tun_id_from_cookie, &ofr->match); + ofr->cookie = ofproto_rule_cast(rule)->flow_cookie; + ofr->priority = htons(rule->cr.flow.priority); ofr->reason = reason; ofr->duration_sec = htonl(sec); ofr->duration_nsec = htonl(msec * 1000000); @@@ -1974,95 -3901,283 +2464,182 @@@ delete_flow(struct ofproto *p, struct w * being added (and expiring). (It also prevents processing OpenFlow * requests that would not add new flows, so it is imperfect.) */ - prev = NULL; - LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) { - if (rule->send_flow_removed && rconn_is_connected(ofconn->rconn) - && ofconn_receives_async_msgs(ofconn)) { - if (prev) { - queue_tx(ofpbuf_clone(buf), prev, prev->reply_counter); - } else { - buf = compose_flow_removed(p, rule, now, reason); - } - prev = ofconn; - } - } - if (prev) { - queue_tx(buf, prev, prev->reply_counter); - } -} - + struct ofproto_rule *ofproto_rule = ofproto_rule_cast(rule); + struct wdp_flow_stats stats; + struct ofpbuf *buf; -static void -expire_rule(struct cls_rule *cls_rule, void *p_) -{ - struct ofproto *p = p_; - struct rule *rule = rule_from_cls_rule(cls_rule); - long long int hard_expire, idle_expire, expire, now; - - hard_expire = (rule->hard_timeout - ? rule->created + rule->hard_timeout * 1000 - : LLONG_MAX); - idle_expire = (rule->idle_timeout - && (rule->super || list_is_empty(&rule->list)) - ? rule->used + rule->idle_timeout * 1000 - : LLONG_MAX); - expire = MIN(hard_expire, idle_expire); - - now = time_msec(); - if (now < expire) { - if (rule->installed && now >= rule->used + 5000) { - uninstall_idle_flow(p, rule); - } else if (!rule->cr.wc.wildcards) { - active_timeout(p, rule); - } + if (ofproto_rule->send_flow_removed) { + /* Compose most of the ofp_flow_removed before 'rule' is destroyed. */ - buf = compose_flow_removed(rule, reason); ++ buf = compose_flow_removed(p, rule, reason); + } else { + buf = NULL; + } + if (wdp_flow_delete(p->wdp, rule, &stats)) { return; } - COVERAGE_INC(ofproto_expired); - - /* Update stats. This code will be a no-op if the rule expired - * due to an idle timeout. */ - if (rule->cr.wc.wildcards) { - struct rule *subrule, *next; - LIST_FOR_EACH_SAFE (subrule, next, struct rule, list, &rule->list) { - rule_remove(p, subrule); - } - } else { - rule_uninstall(p, rule); - } + if (buf) { + struct ofp_flow_removed *ofr; + struct ofconn *prev = NULL; + struct ofconn *ofconn; - if (!rule_is_hidden(rule)) { - send_flow_removed(p, rule, now, - (now >= hard_expire - ? OFPRR_HARD_TIMEOUT : OFPRR_IDLE_TIMEOUT)); - } - rule_remove(p, rule); -} + /* Compose the parts of the ofp_flow_removed that require stats. */ + ofr = buf->data; + ofr->packet_count = htonll(stats.n_packets); + ofr->byte_count = htonll(stats.n_bytes); -static void -active_timeout(struct ofproto *ofproto, struct rule *rule) -{ - if (ofproto->netflow && !is_controller_rule(rule) && - netflow_active_timeout_expired(ofproto->netflow, &rule->nf_flow)) { - struct ofexpired expired; - struct odp_flow odp_flow; - - /* Get updated flow stats. */ - memset(&odp_flow, 0, sizeof odp_flow); - if (rule->installed) { - odp_flow.key = rule->cr.flow; - odp_flow.flags = ODPFF_ZERO_TCP_FLAGS; - dpif_flow_get(ofproto->dpif, &odp_flow); - - if (odp_flow.stats.n_packets) { - update_time(ofproto, rule, &odp_flow.stats); - netflow_flow_update_flags(&rule->nf_flow, odp_flow.stats.ip_tos, - odp_flow.stats.tcp_flags); + LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) { + if (rconn_is_connected(ofconn->rconn)) { + if (prev) { + queue_tx(ofpbuf_clone(buf), prev, prev->reply_counter); + } + prev = ofconn; } } - - expired.flow = rule->cr.flow; - expired.packet_count = rule->packet_count + - odp_flow.stats.n_packets; - expired.byte_count = rule->byte_count + odp_flow.stats.n_bytes; - expired.used = rule->used; - - netflow_expire(ofproto->netflow, &rule->nf_flow, &expired); - - /* Schedule us to send the accumulated records once we have - * collected all of them. */ - poll_immediate_wake(); - } -} - -static void -update_used(struct ofproto *p) -{ - struct odp_flow *flows; - size_t n_flows; - size_t i; - int error; - - error = dpif_flow_list_all(p->dpif, &flows, &n_flows); - if (error) { - return; - } - - for (i = 0; i < n_flows; i++) { - struct odp_flow *f = &flows[i]; - struct rule *rule; - - rule = rule_from_cls_rule( - classifier_find_rule_exactly(&p->cls, &f->key, 0, UINT16_MAX)); - if (!rule || !rule->installed) { - COVERAGE_INC(ofproto_unexpected_rule); - dpif_flow_del(p->dpif, f); - continue; + if (prev) { + queue_tx(buf, prev, prev->reply_counter); + } else { + ofpbuf_delete(buf); } - - update_time(p, rule, &f->stats); - rule_account(p, rule, f->stats.n_bytes); } - free(flows); + free(ofproto_rule); } + /* pinsched callback for sending 'packet' on 'ofconn'. */ static void - do_send_packet_in(struct ofconn *ofconn, uint32_t buffer_id, - const struct wdp_packet *packet, int send_len) -do_send_packet_in(struct ofpbuf *packet, void *ofconn_) ++do_send_packet_in(struct wdp_packet *packet, void *ofconn_) { - struct ofpbuf *opi; - uint8_t reason; + struct ofconn *ofconn = ofconn_; - reason = packet->channel == WDP_CHAN_ACTION ? OFPR_ACTION : OFPR_NO_MATCH; - opi = make_packet_in(buffer_id, packet->in_port, reason, - packet->payload, send_len); - rconn_send_with_limit(ofconn->rconn, opi, ofconn->packet_in_counter, 100); - rconn_send_with_limit(ofconn->rconn, packet, ++ rconn_send_with_limit(ofconn->rconn, packet->payload, + ofconn->packet_in_counter, 100); ++ packet->payload = NULL; ++ wdp_packet_destroy(packet); } + /* Takes 'packet', which has been converted with do_convert_to_packet_in(), and + * finalizes its content for sending on 'ofconn', and passes it to 'ofconn''s + * packet scheduler for sending. + * + * 'max_len' specifies the maximum number of bytes of the packet to send on + * 'ofconn' (INT_MAX specifies no limit). + * + * If 'clone' is true, the caller retains ownership of 'packet'. Otherwise, + * ownership is transferred to this function. */ static void - send_packet_in_action(struct wdp_packet *packet, void *p_) - { - struct ofproto *p = p_; - struct ofconn *ofconn; -schedule_packet_in(struct ofconn *ofconn, struct ofpbuf *packet, int max_len, - bool clone) ++schedule_packet_in(struct ofconn *ofconn, struct wdp_packet *packet, ++ int max_len, bool clone) + { + struct ofproto *ofproto = ofconn->ofproto; - struct ofp_packet_in *opi = packet->data; - uint16_t in_port = ofp_port_to_odp_port(ntohs(opi->in_port)); ++ struct ofp_packet_in *opi = packet->payload->data; + int send_len, trim_size; + uint32_t buffer_id; + + /* Get buffer. */ + if (opi->reason == OFPR_ACTION) { + buffer_id = UINT32_MAX; + } else if (ofproto->fail_open && fail_open_is_active(ofproto->fail_open)) { + buffer_id = pktbuf_get_null(); + } else if (!ofconn->pktbuf) { + buffer_id = UINT32_MAX; + } else { + struct ofpbuf payload; + payload.data = opi->data; - payload.size = packet->size - offsetof(struct ofp_packet_in, data); - buffer_id = pktbuf_save(ofconn->pktbuf, &payload, in_port); ++ payload.size = (packet->payload->size ++ - offsetof(struct ofp_packet_in, data)); ++ buffer_id = pktbuf_save(ofconn->pktbuf, &payload, packet->in_port); + } - LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) { - if (ofconn == p->controller || ofconn->miss_send_len) { - do_send_packet_in(ofconn, UINT32_MAX, packet, packet->send_len); - } + /* Figure out how much of the packet to send. */ + send_len = ntohs(opi->total_len); + if (buffer_id != UINT32_MAX) { + send_len = MIN(send_len, ofconn->miss_send_len); } - wdp_packet_destroy(packet); + send_len = MIN(send_len, max_len); + + /* Adjust packet length and clone if necessary. */ + trim_size = offsetof(struct ofp_packet_in, data) + send_len; + if (clone) { - packet = ofpbuf_clone_data(packet->data, trim_size); - opi = packet->data; ++ packet = wdp_packet_clone(packet, trim_size); ++ opi = packet->payload->data; + } else { - packet->size = trim_size; ++ packet->payload->size = trim_size; + } + + /* Update packet headers. */ + opi->buffer_id = htonl(buffer_id); - update_openflow_length(packet); ++ update_openflow_length(packet->payload); + + /* Hand over to packet scheduler. It might immediately call into + * do_send_packet_in() or it might buffer it for a while (until a later + * call to pinsched_run()). */ - pinsched_send(ofconn->schedulers[opi->reason], in_port, ++ pinsched_send(ofconn->schedulers[opi->reason], packet->in_port, + packet, do_send_packet_in, ofconn); } -/* Replace struct odp_msg header in 'packet' by equivalent struct - * ofp_packet_in. The odp_msg must have sufficient headroom to do so (e.g. as - * returned by dpif_recv()). ++/* Converts 'packet->payload' to a struct ofp_packet_in. It must have ++ * sufficient headroom to do so (e.g. as returned by dpif_recv()). + * + * The conversion is not complete: the caller still needs to trim any unneeded + * payload off the end of the buffer, set the length in the OpenFlow header, + * and set buffer_id. Those require us to know the controller settings and so + * must be done on a per-controller basis. + * + * Returns the maximum number of bytes of the packet that should be sent to + * the controller (INT_MAX if no limit). */ + static int -do_convert_to_packet_in(struct ofpbuf *packet) ++do_convert_to_packet_in(struct wdp_packet *packet) + { - struct odp_msg *msg = packet->data; ++ uint16_t total_len = packet->payload->size; + struct ofp_packet_in *opi; - uint8_t reason; - uint16_t total_len; - uint16_t in_port; - int max_len; - - /* Extract relevant header fields */ - if (msg->type == _ODPL_ACTION_NR) { - reason = OFPR_ACTION; - max_len = msg->arg; - } else { - reason = OFPR_NO_MATCH; - max_len = INT_MAX; - } - total_len = msg->length - sizeof *msg; - in_port = odp_port_to_ofp_port(msg->port); + + /* Repurpose packet buffer by overwriting header. */ - ofpbuf_pull(packet, sizeof(struct odp_msg)); - opi = ofpbuf_push_zeros(packet, offsetof(struct ofp_packet_in, data)); ++ opi = ofpbuf_push_zeros(packet->payload, ++ offsetof(struct ofp_packet_in, data)); + opi->header.version = OFP_VERSION; + opi->header.type = OFPT_PACKET_IN; + opi->total_len = htons(total_len); - opi->in_port = htons(in_port); - opi->reason = reason; - - return max_len; ++ opi->in_port = htons(packet->in_port); ++ if (packet->channel == WDP_CHAN_MISS) { ++ opi->reason = OFPR_NO_MATCH; ++ return INT_MAX; ++ } else { ++ opi->reason = OFPR_ACTION; ++ return packet->send_len; ++ } + } + -/* Given 'packet' containing an odp_msg of type _ODPL_ACTION_NR or - * _ODPL_MISS_NR, sends an OFPT_PACKET_IN message to each OpenFlow controller - * as necessary according to their individual configurations. ++/* Given 'packet' with channel WDP_CHAN_ACTION or WDP_CHAN_MISS, sends an ++ * OFPT_PACKET_IN message to each OpenFlow controller as necessary according to ++ * their individual configurations. + * - * 'packet' must have sufficient headroom to convert it into a struct ++ * 'packet->payload' must have sufficient headroom to convert it into a struct + * ofp_packet_in (e.g. as returned by dpif_recv()). + * + * Takes ownership of 'packet'. */ static void - send_packet_in_miss(struct wdp_packet *packet, void *p_) -send_packet_in(struct ofproto *ofproto, struct ofpbuf *packet) ++send_packet_in(struct ofproto *ofproto, struct wdp_packet *packet) { - struct ofproto *p = p_; - bool in_fail_open = p->fail_open && fail_open_is_active(p->fail_open); - struct ofconn *ofconn; + struct ofconn *ofconn, *prev; + int max_len; - LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) { - if (ofconn->miss_send_len) { - struct pktbuf *pb = ofconn->pktbuf; - uint32_t buffer_id = (in_fail_open - ? pktbuf_get_null() - : pktbuf_save(pb, packet->payload, - packet->in_port)); - int send_len = (buffer_id != UINT32_MAX ? ofconn->miss_send_len - : UINT32_MAX); - do_send_packet_in(ofconn, buffer_id, packet, send_len); + max_len = do_convert_to_packet_in(packet); + + prev = NULL; + LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) { + if (ofconn_receives_async_msgs(ofconn)) { + if (prev) { + schedule_packet_in(prev, packet, max_len, true); + } + prev = ofconn; } } - wdp_packet_destroy(packet); + if (prev) { + schedule_packet_in(prev, packet, max_len, false); + } else { - ofpbuf_delete(packet); ++ wdp_packet_destroy(packet); + } } static uint64_t diff --cc ofproto/pinsched.h index 2ec33bc7a,17e3db1d1..0bbdbe079 --- a/ofproto/pinsched.h +++ b/ofproto/pinsched.h @@@ -21,14 -21,15 +21,16 @@@ struct ofpbuf; struct switch_status; +struct wdp_packet; -typedef void pinsched_tx_cb(struct ofpbuf *, void *aux); +typedef void pinsched_tx_cb(struct wdp_packet *, void *aux); struct pinsched *pinsched_create(int rate_limit, int burst_limit, struct switch_status *); + void pinsched_get_limits(const struct pinsched *, + int *rate_limit, int *burst_limit); void pinsched_set_limits(struct pinsched *, int rate_limit, int burst_limit); void pinsched_destroy(struct pinsched *); -void pinsched_send(struct pinsched *, uint16_t port_no, struct ofpbuf *, +void pinsched_send(struct pinsched *, uint16_t port_no, struct wdp_packet *, pinsched_tx_cb *, void *aux); void pinsched_run(struct pinsched *, pinsched_tx_cb *, void *aux); void pinsched_wait(struct pinsched *); diff --cc ofproto/wdp-xflow.c index 4a7154b5a,000000000..3e4edeb46 mode 100644,000000..100644 --- a/ofproto/wdp-xflow.c +++ b/ofproto/wdp-xflow.c @@@ -1,2256 -1,0 +1,2285 @@@ +/* + * Copyright (c) 2010 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "wdp-xflow.h" + +#include +#include + +#include "coverage.h" +#include "dhcp.h" +#include "netdev.h" +#include "netflow.h" +#include "ofpbuf.h" +#include "openflow/nicira-ext.h" +#include "openflow/openflow.h" +#include "packets.h" +#include "poll-loop.h" +#include "port-array.h" +#include "shash.h" +#include "stp.h" +#include "svec.h" +#include "timeval.h" +#include "util.h" +#include "vconn.h" +#include "wdp-provider.h" +#include "xfif.h" +#include "xflow-util.h" +#include "xtoxll.h" + +#define THIS_MODULE VLM_wdp_xflow +#include "vlog.h" + +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + +/* Maximum numbers of rules. */ +#define WX_MAX_WILD 65536 /* Wildcarded rules. */ +#define WX_MAX_EXACT 1048576 /* Exact-match rules. */ + +struct wx { + struct list list_node; + struct wdp wdp; + struct xfif *xfif; + struct classifier cls; + struct netdev_monitor *netdev_monitor; + struct port_array ports; /* Index is ODP port nr; wdp_port->opp.port_no + * is OFP port nr. */ + struct shash port_by_name; + bool need_revalidate; + long long int next_expiration; +}; + +static struct list all_wx = LIST_INITIALIZER(&all_wx); + +static int wx_port_init(struct wx *); +static void wx_port_run(struct wx *); +static void wx_port_refresh_groups(struct wx *); + +enum { + WX_GROUP_FLOOD = 0, + WX_GROUP_ALL = 1 +}; + +static struct wx * +wx_cast(const struct wdp *wdp) +{ + return CONTAINER_OF(wdp, struct wx, wdp); +} + +static int +wx_xlate_actions(struct wx *, const union ofp_action *, size_t n, + const flow_t *flow, const struct ofpbuf *packet, + struct xflow_actions *out, bool *may_set_up_flow); + +struct wx_rule { + struct wdp_rule wr; + + uint64_t packet_count; /* Number of packets received. */ + uint64_t byte_count; /* Number of bytes received. */ + uint64_t accounted_bytes; /* Number of bytes passed to account_cb. */ + long long int used; /* Last-used time (0 if never used). */ + + /* If 'super' is non-NULL, this rule is a subrule, that is, it is an + * exact-match rule (having cr.wc.wildcards of 0) generated from the + * wildcard rule 'super'. In this case, 'list' is an element of the + * super-rule's list. + * + * If 'super' is NULL, this rule is a super-rule, and 'list' is the head of + * a list of subrules. A super-rule with no wildcards (where + * cr.wc.wildcards is 0) will never have any subrules. */ + struct wx_rule *super; + struct list list; + + /* Datapath actions. + * + * A super-rule with wildcard fields never has XFLOW actions (since the + * datapath only supports exact-match flows). */ + bool installed; /* Installed in datapath? */ + bool may_install; /* True ordinarily; false if actions must + * be reassessed for every packet. */ + int n_xflow_actions; + union xflow_action *xflow_actions; +}; + +static void wx_rule_destroy(struct wx *, struct wx_rule *); +static void wx_rule_update_actions(struct wx *, struct wx_rule *); +static void wx_rule_execute(struct wx *, struct wx_rule *, + struct ofpbuf *packet, const flow_t *); +static bool wx_rule_make_actions(struct wx *, struct wx_rule *, + const struct ofpbuf *packet); +static void wx_rule_install(struct wx *, struct wx_rule *, + struct wx_rule *displaced_rule); + +static struct wx_rule * +wx_rule_cast(const struct cls_rule *cls_rule) +{ + return cls_rule ? CONTAINER_OF(cls_rule, struct wx_rule, wr.cr) : NULL; +} + +/* Returns true if 'rule' is merely an implementation detail that should be + * hidden from the client. */ +static inline bool +wx_rule_is_hidden(const struct wx_rule *rule) +{ + return rule->super != NULL; +} + +static void +wx_rule_free(struct wx_rule *rule) +{ + wdp_rule_uninit(&rule->wr); + free(rule->xflow_actions); + free(rule); +} + +static void +wx_rule_account(struct wx *wx OVS_UNUSED, struct wx_rule *rule OVS_UNUSED, + uint64_t extra_bytes OVS_UNUSED) +{ + /* XXX call account_cb hook */ +} + +static void +wx_rule_post_uninstall(struct wx *wx, struct wx_rule *rule) +{ + struct wx_rule *super = rule->super; + + wx_rule_account(wx, rule, 0); + + /* XXX netflow expiration */ + + if (super) { + super->packet_count += rule->packet_count; + super->byte_count += rule->byte_count; + + /* Reset counters to prevent double counting if the rule ever gets + * reinstalled. */ + rule->packet_count = 0; + rule->byte_count = 0; + rule->accounted_bytes = 0; + + //XXX netflow_flow_clear(&rule->nf_flow); + } +} + +static long long int +xflow_flow_stats_to_msec(const struct xflow_flow_stats *stats) +{ + return (stats->used_sec + ? stats->used_sec * 1000 + stats->used_nsec / 1000000 + : 0); +} + +static void +wx_rule_update_time(struct wx *wx OVS_UNUSED, struct wx_rule *rule, + const struct xflow_flow_stats *stats) +{ + long long int used = xflow_flow_stats_to_msec(stats); + if (used > rule->used) { + rule->used = used; + if (rule->super && used > rule->super->used) { + rule->super->used = used; + } + //XXX netflow_flow_update_time(ofproto->netflow, &rule->nf_flow, used); + } +} + +static void +wx_rule_update_stats(struct wx *wx, struct wx_rule *rule, + const struct xflow_flow_stats *stats) +{ + if (stats->n_packets) { + wx_rule_update_time(wx, rule, stats); + rule->packet_count += stats->n_packets; + rule->byte_count += stats->n_bytes; + /* XXX netflow_flow_update_flags(&rule->nf_flow, stats->ip_tos, + stats->tcp_flags); */ + } +} + +static void +wx_rule_uninstall(struct wx *wx, struct wx_rule *rule) +{ + assert(!rule->wr.cr.flow.wildcards); + if (rule->installed) { + struct xflow_flow xflow_flow; + + xflow_key_from_flow(&xflow_flow.key, &rule->wr.cr.flow); + xflow_flow.actions = NULL; + xflow_flow.n_actions = 0; + xflow_flow.flags = 0; + if (!xfif_flow_del(wx->xfif, &xflow_flow)) { + wx_rule_update_stats(wx, rule, &xflow_flow.stats); + } + rule->installed = false; + + wx_rule_post_uninstall(wx, rule); + } +} + +#if 0 +static bool +is_controller_rule(struct wx_rule *rule) +{ + /* If the only action is send to the controller then don't report + * NetFlow expiration messages since it is just part of the control + * logic for the network and not real traffic. */ + + if (rule && rule->super) { + struct wdp_rule *super = &rule->super->wr; + + return super->n_actions == 1 && + super->actions[0].type == htons(OFPAT_OUTPUT) && + super->actions[0].output.port == htons(OFPP_CONTROLLER); + } + + return false; +} +#endif + +static void +wx_rule_remove(struct wx *wx, struct wx_rule *rule) +{ + if (rule->wr.cr.flow.wildcards) { + COVERAGE_INC(wx_del_wc_flow); + wx->need_revalidate = true; + } else { + wx_rule_uninstall(wx, rule); + } + classifier_remove(&wx->cls, &rule->wr.cr); + wx_rule_destroy(wx, rule); +} + +static bool +wx_rule_revalidate(struct wx *wx, struct wx_rule *rule) +{ + const flow_t *flow = &rule->wr.cr.flow; + + COVERAGE_INC(wx_rule_revalidate); + if (rule->super) { + struct wx_rule *super; + super = wx_rule_cast(classifier_lookup_wild(&wx->cls, flow)); + if (!super) { + wx_rule_remove(wx, rule); + return false; + } else if (super != rule->super) { + COVERAGE_INC(wx_revalidate_moved); + list_remove(&rule->list); + list_push_back(&super->list, &rule->list); + rule->super = super; + rule->wr.hard_timeout = super->wr.hard_timeout; + rule->wr.idle_timeout = super->wr.idle_timeout; + rule->wr.created = super->wr.created; + rule->used = 0; + } + } + + wx_rule_update_actions(wx, rule); + return true; +} + +/* Destroys 'rule'. If 'rule' is a subrule, also removes it from its + * super-rule's list of subrules. If 'rule' is a super-rule, also iterates + * through all of its subrules and revalidates them, destroying any that no + * longer has a super-rule (which is probably all of them). + * + * Before calling this function, the caller must make have removed 'rule' from + * the classifier. If 'rule' is an exact-match rule, the caller is also + * responsible for ensuring that it has been uninstalled from the datapath. */ +static void +wx_rule_destroy(struct wx *wx, struct wx_rule *rule) +{ + if (!rule->super) { + struct wx_rule *subrule, *next; + LIST_FOR_EACH_SAFE (subrule, next, struct wx_rule, list, &rule->list) { + wx_rule_revalidate(wx, subrule); + } + } else { + list_remove(&rule->list); + } + wx_rule_free(rule); +} + +#if 0 +static bool +wx_rule_has_out_port(const struct wx_rule *rule, uint16_t out_port) +{ + const union ofp_action *oa; + struct actions_iterator i; + + if (out_port == htons(OFPP_NONE)) { + return true; + } + for (oa = actions_first(&i, rule->wr.actions, + rule->wr.n_actions); + oa; + oa = actions_next(&i)) { + if (oa->type == htons(OFPAT_OUTPUT) && oa->output.port == out_port) { + return true; + } + } + return false; +} +#endif + +/* Caller is responsible for initializing the 'cr' member of the returned + * rule. */ +static struct wx_rule * +wx_rule_create(struct wx_rule *super, + const union ofp_action *actions, size_t n_actions, + uint16_t idle_timeout, uint16_t hard_timeout) +{ + struct wx_rule *rule = xzalloc(sizeof *rule); + wdp_rule_init(&rule->wr, actions, n_actions); + rule->wr.idle_timeout = idle_timeout; + rule->wr.hard_timeout = hard_timeout; + rule->used = rule->wr.created; + rule->super = super; + if (super) { + list_push_back(&super->list, &rule->list); + } else { + list_init(&rule->list); + } +#if 0 + netflow_flow_clear(&rule->nf_flow); + netflow_flow_update_time(ofproto->netflow, &rule->nf_flow, rule->created); +#endif + + return rule; +} + +/* Executes the actions indicated by 'rule' on 'packet', which is in flow + * 'flow' and is considered to have arrived on XFLOW port 'in_port'. + * + * The flow that 'packet' actually contains does not need to actually match + * 'rule'; the actions in 'rule' will be applied to it either way. Likewise, + * the packet and byte counters for 'rule' will be credited for the packet sent + * out whether or not the packet actually matches 'rule'. + * + * If 'rule' is an exact-match rule and 'flow' actually equals the rule's flow, + * the caller must already have accurately composed XFLOW actions for it given + * 'packet' using rule_make_actions(). If 'rule' is a wildcard rule, or if + * 'rule' is an exact-match rule but 'flow' is not the rule's flow, then this + * function will compose a set of XFLOW actions based on 'rule''s OpenFlow + * actions and apply them to 'packet'. */ +static void +wx_rule_execute(struct wx *wx, struct wx_rule *rule, + struct ofpbuf *packet, const flow_t *flow) +{ + const union xflow_action *actions; + size_t n_actions; + struct xflow_actions a; + + /* Grab or compose the XFLOW actions. + * + * The special case for an exact-match 'rule' where 'flow' is not the + * rule's flow is important to avoid, e.g., sending a packet out its input + * port simply because the XFLOW actions were composed for the wrong + * scenario. */ + if (rule->wr.cr.flow.wildcards + || !flow_equal(flow, &rule->wr.cr.flow)) + { + struct wx_rule *super = rule->super ? rule->super : rule; + if (wx_xlate_actions(wx, super->wr.actions, super->wr.n_actions, flow, + packet, &a, NULL)) { + return; + } + actions = a.actions; + n_actions = a.n_actions; + } else { + actions = rule->xflow_actions; + n_actions = rule->n_xflow_actions; + } + + /* Execute the XFLOW actions. */ + if (!xfif_execute(wx->xfif, flow->in_port, + actions, n_actions, packet)) { + struct xflow_flow_stats stats; + flow_extract_stats(flow, packet, &stats); + wx_rule_update_stats(wx, rule, &stats); + rule->used = time_msec(); + //XXX netflow_flow_update_time(wx->netflow, &rule->nf_flow, rule->used); + } +} + +static void +wx_rule_insert(struct wx *wx, struct wx_rule *rule, struct ofpbuf *packet, + uint16_t in_port) +{ + struct wx_rule *displaced_rule; + + /* Insert the rule in the classifier. */ + displaced_rule = wx_rule_cast(classifier_insert(&wx->cls, &rule->wr.cr)); + if (!rule->wr.cr.flow.wildcards) { + wx_rule_make_actions(wx, rule, packet); + } + + /* Send the packet and credit it to the rule. */ + if (packet) { + flow_t flow; - flow_extract(packet, in_port, &flow); ++ flow_extract(packet, 0, in_port, &flow); + wx_rule_execute(wx, rule, packet, &flow); + } + + /* Install the rule in the datapath only after sending the packet, to + * avoid packet reordering. */ + if (rule->wr.cr.flow.wildcards) { + COVERAGE_INC(wx_add_wc_flow); + wx->need_revalidate = true; + } else { + wx_rule_install(wx, rule, displaced_rule); + } + + /* Free the rule that was displaced, if any. */ + if (displaced_rule) { + rule->wr.client_data = displaced_rule->wr.client_data; + wx_rule_destroy(wx, displaced_rule); + } +} + +static struct wx_rule * +wx_rule_create_subrule(struct wx *wx, struct wx_rule *rule, const flow_t *flow) +{ + struct wx_rule *subrule; + + subrule = wx_rule_create(rule, NULL, 0, + rule->wr.idle_timeout, + rule->wr.hard_timeout); + COVERAGE_INC(wx_subrule_create); - cls_rule_from_flow(&subrule->wr.cr, flow); ++ cls_rule_from_flow(flow, &subrule->wr.cr); + classifier_insert_exact(&wx->cls, &subrule->wr.cr); + + return subrule; +} + +/* Returns true if the actions changed, false otherwise. */ +static bool +wx_rule_make_actions(struct wx *wx, struct wx_rule *rule, + const struct ofpbuf *packet) +{ + const struct wx_rule *super; + struct xflow_actions a; + size_t actions_len; + + assert(!rule->wr.cr.flow.wildcards); + + super = rule->super ? rule->super : rule; + wx_xlate_actions(wx, super->wr.actions, super->wr.n_actions, + &rule->wr.cr.flow, packet, &a, &rule->may_install); + + actions_len = a.n_actions * sizeof *a.actions; + if (rule->n_xflow_actions != a.n_actions + || memcmp(rule->xflow_actions, a.actions, actions_len)) { + COVERAGE_INC(wx_xflow_unchanged); + free(rule->xflow_actions); + rule->n_xflow_actions = a.n_actions; + rule->xflow_actions = xmemdup(a.actions, actions_len); + return true; + } else { + return false; + } +} + +static int +do_put_flow(struct wx *wx, struct wx_rule *rule, int flags, + struct xflow_flow_put *put) +{ + memset(&put->flow.stats, 0, sizeof put->flow.stats); + xflow_key_from_flow(&put->flow.key, &rule->wr.cr.flow); + put->flow.actions = rule->xflow_actions; + put->flow.n_actions = rule->n_xflow_actions; + put->flow.flags = 0; + put->flags = flags; + return xfif_flow_put(wx->xfif, put); +} + +static void +wx_rule_install(struct wx *wx, struct wx_rule *rule, struct wx_rule *displaced_rule) +{ + assert(!rule->wr.cr.flow.wildcards); + + if (rule->may_install) { + struct xflow_flow_put put; + if (!do_put_flow(wx, rule, + XFLOWPF_CREATE | XFLOWPF_MODIFY | XFLOWPF_ZERO_STATS, + &put)) { + rule->installed = true; + if (displaced_rule) { + wx_rule_update_stats(wx, displaced_rule, &put.flow.stats); + wx_rule_post_uninstall(wx, displaced_rule); + } + } + } else if (displaced_rule) { + wx_rule_uninstall(wx, displaced_rule); + } +} + +static void +wx_rule_reinstall(struct wx *wx, struct wx_rule *rule) +{ + if (rule->installed) { + struct xflow_flow_put put; + COVERAGE_INC(wx_dp_missed); + do_put_flow(wx, rule, XFLOWPF_CREATE | XFLOWPF_MODIFY, &put); + } else { + wx_rule_install(wx, rule, NULL); + } +} + +static void +wx_rule_update_actions(struct wx *wx, struct wx_rule *rule) +{ + bool actions_changed; +#if 0 + uint16_t new_out_iface, old_out_iface; + + old_out_iface = rule->nf_flow.output_iface; +#endif + actions_changed = wx_rule_make_actions(wx, rule, NULL); + + if (rule->may_install) { + if (rule->installed) { + if (actions_changed) { + struct xflow_flow_put put; + do_put_flow(wx, rule, XFLOWPF_CREATE | XFLOWPF_MODIFY + | XFLOWPF_ZERO_STATS, &put); + wx_rule_update_stats(wx, rule, &put.flow.stats); +#if 0 + /* Temporarily set the old output iface so that NetFlow + * messages have the correct output interface for the old + * stats. */ + new_out_iface = rule->nf_flow.output_iface; + rule->nf_flow.output_iface = old_out_iface; +#endif + wx_rule_post_uninstall(wx, rule); + //rule->nf_flow.output_iface = new_out_iface; + } + } else { + wx_rule_install(wx, rule, NULL); + } + } else { + wx_rule_uninstall(wx, rule); + } +} + +static void +add_output_group_action(struct xflow_actions *actions, uint16_t group, + uint16_t *nf_output_iface) +{ + xflow_actions_add(actions, XFLOWAT_OUTPUT_GROUP)->output_group.group = group; + + if (group == WX_GROUP_ALL || group == WX_GROUP_FLOOD) { + *nf_output_iface = NF_OUT_FLOOD; + } +} + +static void +add_controller_action(struct xflow_actions *actions, + const struct ofp_action_output *oao) +{ + union xflow_action *a = xflow_actions_add(actions, XFLOWAT_CONTROLLER); - a->controller.arg = oao->max_len ? ntohs(oao->max_len) : UINT32_MAX; ++ a->controller.arg = ntohs(oao->max_len); +} + +struct wx_xlate_ctx { + /* Input. */ - const flow_t *flow; /* Flow to which these actions correspond. */ ++ flow_t flow; /* Flow to which these actions correspond. */ + int recurse; /* Recursion level, via xlate_table_action. */ + struct wx *wx; + const struct ofpbuf *packet; /* The packet corresponding to 'flow', or a + * null pointer if we are revalidating + * without a packet to refer to. */ + + /* Output. */ + struct xflow_actions *out; /* Datapath actions. */ + //tag_type *tags; /* Tags associated with OFPP_NORMAL actions. */ + bool may_set_up_flow; /* True ordinarily; false if the actions must + * be reassessed for every packet. */ + uint16_t nf_output_iface; /* Output interface index for NetFlow. */ +}; + +static void do_xlate_actions(const union ofp_action *in, size_t n_in, + struct wx_xlate_ctx *ctx); + +static void +add_output_action(struct wx_xlate_ctx *ctx, uint16_t port) +{ + const struct wdp_port *wdp_port = port_array_get(&ctx->wx->ports, port); + + if (wdp_port) { + if (wdp_port->opp.config & OFPPC_NO_FWD) { + /* Forwarding disabled on port. */ + return; + } + } else { + /* + * We don't have an ofport record for this port, but it doesn't hurt to + * allow forwarding to it anyhow. Maybe such a port will appear later + * and we're pre-populating the flow table. + */ + } + + xflow_actions_add(ctx->out, XFLOWAT_OUTPUT)->output.port = port; + //ctx->nf_output_iface = port; +} + +static struct wx_rule * +wx_rule_lookup_valid(struct wx *wx, const flow_t *flow) +{ + struct wx_rule *rule = wx_rule_cast(classifier_lookup(&wx->cls, flow)); + + /* The rule we found might not be valid, since we could be in need of + * revalidation. If it is not valid, don't return it. */ + if (rule + && rule->super + && wx->need_revalidate + && !wx_rule_revalidate(wx, rule)) { + COVERAGE_INC(wx_invalidated); + return NULL; + } + + return rule; +} + +static void +xlate_table_action(struct wx_xlate_ctx *ctx, uint16_t in_port) +{ + if (!ctx->recurse) { ++ uint16_t old_in_port; + struct wx_rule *rule; - flow_t flow; + - flow = *ctx->flow; - flow.in_port = in_port; ++ /* Look up a flow with 'in_port' as the input port. Then restore the ++ * original input port (otherwise OFPP_NORMAL and OFPP_IN_PORT will ++ * have surprising behavior). */ ++ old_in_port = ctx->flow.in_port; ++ ctx->flow.in_port = in_port; ++ rule = wx_rule_lookup_valid(ctx->wx, &ctx->flow); ++ ctx->flow.in_port = old_in_port; + - rule = wx_rule_lookup_valid(ctx->wx, &flow); + if (rule) { + if (rule->super) { + rule = rule->super; + } + + ctx->recurse++; + do_xlate_actions(rule->wr.actions, rule->wr.n_actions, ctx); + ctx->recurse--; + } + } +} + +static void +xlate_output_action(struct wx_xlate_ctx *ctx, + const struct ofp_action_output *oao) +{ + uint16_t xflow_port; + uint16_t prev_nf_output_iface = ctx->nf_output_iface; + + ctx->nf_output_iface = NF_OUT_DROP; + + switch (ntohs(oao->port)) { + case OFPP_IN_PORT: - add_output_action(ctx, ctx->flow->in_port); ++ add_output_action(ctx, ctx->flow.in_port); + break; + case OFPP_TABLE: - xlate_table_action(ctx, ctx->flow->in_port); ++ xlate_table_action(ctx, ctx->flow.in_port); + break; + case OFPP_NORMAL: +#if 0 + if (!ctx->wx->ofhooks->normal_cb(ctx->flow, ctx->packet, + ctx->out, ctx->tags, + &ctx->nf_output_iface, + ctx->wx->aux)) { + COVERAGE_INC(wx_uninstallable); + ctx->may_set_up_flow = false; + } + break; +#else + /* fall through to flood for now */ +#endif + case OFPP_FLOOD: + add_output_group_action(ctx->out, WX_GROUP_FLOOD, + &ctx->nf_output_iface); + break; + case OFPP_ALL: + add_output_group_action(ctx->out, WX_GROUP_ALL, &ctx->nf_output_iface); + break; + case OFPP_CONTROLLER: + add_controller_action(ctx->out, oao); + break; + case OFPP_LOCAL: + add_output_action(ctx, XFLOWP_LOCAL); + break; + default: + xflow_port = ofp_port_to_xflow_port(ntohs(oao->port)); - if (xflow_port != ctx->flow->in_port) { ++ if (xflow_port != ctx->flow.in_port) { + add_output_action(ctx, xflow_port); + } + break; + } + + if (prev_nf_output_iface == NF_OUT_FLOOD) { + ctx->nf_output_iface = NF_OUT_FLOOD; + } else if (ctx->nf_output_iface == NF_OUT_DROP) { + ctx->nf_output_iface = prev_nf_output_iface; + } else if (prev_nf_output_iface != NF_OUT_DROP && + ctx->nf_output_iface != NF_OUT_FLOOD) { + ctx->nf_output_iface = NF_OUT_MULTI; + } +} + +static void +xlate_nicira_action(struct wx_xlate_ctx *ctx, + const struct nx_action_header *nah) +{ + const struct nx_action_resubmit *nar; ++ const struct nx_action_set_tunnel *nast; ++ union xflow_action *oa; + int subtype = ntohs(nah->subtype); + + assert(nah->vendor == htonl(NX_VENDOR_ID)); + switch (subtype) { + case NXAST_RESUBMIT: + nar = (const struct nx_action_resubmit *) nah; + xlate_table_action(ctx, ofp_port_to_xflow_port(ntohs(nar->in_port))); + break; + ++ case NXAST_SET_TUNNEL: ++ nast = (const struct nx_action_set_tunnel *) nah; ++ oa = xflow_actions_add(ctx->out, XFLOWAT_SET_TUNNEL); ++ ctx->flow.tun_id = oa->tunnel.tun_id = nast->tun_id; ++ break; ++ ++ /* If you add a new action here that modifies flow data, don't forget to ++ * update the flow key in ctx->flow in the same key. */ ++ + default: + VLOG_DBG_RL(&rl, "unknown Nicira action type %"PRIu16, subtype); + break; + } +} + +static void +do_xlate_actions(const union ofp_action *in, size_t n_in, + struct wx_xlate_ctx *ctx) +{ + struct actions_iterator iter; + const union ofp_action *ia; + const struct wdp_port *port; + - port = port_array_get(&ctx->wx->ports, ctx->flow->in_port); ++ port = port_array_get(&ctx->wx->ports, ctx->flow.in_port); + if (port && port->opp.config & (OFPPC_NO_RECV | OFPPC_NO_RECV_STP) && - port->opp.config & (eth_addr_equals(ctx->flow->dl_dst, stp_eth_addr) ++ port->opp.config & (eth_addr_equals(ctx->flow.dl_dst, stp_eth_addr) + ? OFPPC_NO_RECV_STP : OFPPC_NO_RECV)) { + /* Drop this flow. */ + return; + } + + for (ia = actions_first(&iter, in, n_in); ia; ia = actions_next(&iter)) { + uint16_t type = ntohs(ia->type); + union xflow_action *oa; + + switch (type) { + case OFPAT_OUTPUT: + xlate_output_action(ctx, &ia->output); + break; + + case OFPAT_SET_VLAN_VID: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_DL_TCI); + oa->dl_tci.tci = ia->vlan_vid.vlan_vid & htons(VLAN_VID_MASK); + oa->dl_tci.mask = htons(VLAN_VID_MASK); ++ ctx->flow.dl_vlan = ia->vlan_vid.vlan_vid; + break; + + case OFPAT_SET_VLAN_PCP: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_DL_TCI); + oa->dl_tci.tci = htons((ia->vlan_pcp.vlan_pcp << VLAN_PCP_SHIFT) + & VLAN_PCP_MASK); + oa->dl_tci.mask = htons(VLAN_PCP_MASK); ++ ++ if (ctx->flow.dl_vlan == htons(OFP_VLAN_NONE)) { ++ ctx->flow.dl_vlan = htons(0); ++ } ++ ctx->flow.dl_vlan_pcp = ia->vlan_pcp.vlan_pcp; + break; + + case OFPAT_STRIP_VLAN: + xflow_actions_add(ctx->out, XFLOWAT_STRIP_VLAN); ++ ctx->flow.dl_vlan = htons(OFP_VLAN_NONE); ++ ctx->flow.dl_vlan_pcp = 0; + break; + + case OFPAT_SET_DL_SRC: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_DL_SRC); + memcpy(oa->dl_addr.dl_addr, + ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN); ++ memcpy(ctx->flow.dl_src, ++ ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN); + break; + + case OFPAT_SET_DL_DST: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_DL_DST); + memcpy(oa->dl_addr.dl_addr, + ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN); ++ memcpy(ctx->flow.dl_dst, ++ ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN); + break; + + case OFPAT_SET_NW_SRC: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_NW_SRC); - oa->nw_addr.nw_addr = ia->nw_addr.nw_addr; ++ ctx->flow.nw_src = oa->nw_addr.nw_addr = ia->nw_addr.nw_addr; + break; + + case OFPAT_SET_NW_DST: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_NW_DST); - oa->nw_addr.nw_addr = ia->nw_addr.nw_addr; ++ ctx->flow.nw_dst = oa->nw_addr.nw_addr = ia->nw_addr.nw_addr; + break; + + case OFPAT_SET_NW_TOS: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_NW_TOS); - oa->nw_tos.nw_tos = ia->nw_tos.nw_tos; ++ ctx->flow.nw_tos = oa->nw_tos.nw_tos = ia->nw_tos.nw_tos; + break; + + case OFPAT_SET_TP_SRC: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_TP_SRC); - oa->tp_port.tp_port = ia->tp_port.tp_port; ++ ctx->flow.tp_src = oa->tp_port.tp_port = ia->tp_port.tp_port; + break; + + case OFPAT_SET_TP_DST: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_TP_DST); - oa->tp_port.tp_port = ia->tp_port.tp_port; ++ ctx->flow.tp_dst = oa->tp_port.tp_port = ia->tp_port.tp_port; + break; + + case OFPAT_VENDOR: + xlate_nicira_action(ctx, (const struct nx_action_header *) ia); + break; + + default: + VLOG_DBG_RL(&rl, "unknown action type %"PRIu16, type); + break; + } + } +} + +/* Returns true if 'flow' and 'actions' may be set up as a flow in the kernel. + * This is true most of the time, but we don't allow flows that would prevent + * DHCP replies from being seen by the local port to be set up in the + * kernel. + * + * We only need this, strictly speaking, when in-band control is turned on. */ +static bool +wx_may_set_up(const flow_t *flow, const struct xflow_actions *actions) +{ + if (flow->dl_type == htons(ETH_TYPE_IP) + && flow->nw_proto == IP_TYPE_UDP + && flow->tp_src == htons(DHCP_SERVER_PORT) + && flow->tp_dst == htons(DHCP_CLIENT_PORT)) { + int i; + + for (i = 0; i < actions->n_actions; i++) { + const struct xflow_action_output *oao = &actions->actions[i].output; + if (oao->type == XFLOWAT_OUTPUT && oao->port == XFLOWP_LOCAL) { + return true; + } + } + return false; + } + + return true; +} + +static int +wx_xlate_actions(struct wx *wx, const union ofp_action *in, size_t n_in, + const flow_t *flow, const struct ofpbuf *packet, + struct xflow_actions *out, bool *may_set_up_flow) +{ + //tag_type no_tags = 0; + struct wx_xlate_ctx ctx; + COVERAGE_INC(wx_ofp2xflow); + xflow_actions_init(out); - ctx.flow = flow; ++ ctx.flow = *flow; + ctx.recurse = 0; + ctx.wx = wx; + ctx.packet = packet; + ctx.out = out; + //ctx.tags = tags ? tags : &no_tags; + ctx.may_set_up_flow = true; + ctx.nf_output_iface = NF_OUT_DROP; + do_xlate_actions(in, n_in, &ctx); + + if (may_set_up_flow) { + *may_set_up_flow = ctx.may_set_up_flow && wx_may_set_up(flow, out); + } +#if 0 + if (nf_output_iface) { + *nf_output_iface = ctx.nf_output_iface; + } +#endif + if (xflow_actions_overflow(out)) { + xflow_actions_init(out); + return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_TOO_MANY); + } + return 0; +} + +static void +update_used(struct wx *wx) +{ + struct xflow_flow *flows; + size_t n_flows; + size_t i; + int error; + + error = xfif_flow_list_all(wx->xfif, &flows, &n_flows); + if (error) { + return; + } + + for (i = 0; i < n_flows; i++) { + struct xflow_flow *f = &flows[i]; + struct wx_rule *rule; + flow_t flow; + + xflow_key_to_flow(&f->key, &flow); + rule = wx_rule_cast(classifier_find_rule_exactly(&wx->cls, &flow)); + if (!rule || !rule->installed) { + COVERAGE_INC(wx_unexpected_rule); + xfif_flow_del(wx->xfif, f); + continue; + } + + wx_rule_update_time(wx, rule, &f->stats); + wx_rule_account(wx, rule, f->stats.n_bytes); + } + free(flows); +} + +static void +uninstall_idle_flow(struct wx *wx, struct wx_rule *rule) +{ + assert(rule->installed); + assert(!rule->wr.cr.flow.wildcards); + + if (rule->super) { + wx_rule_remove(wx, rule); + } else { + wx_rule_uninstall(wx, rule); + } +} + +static void +expire_rule(struct cls_rule *cls_rule, void *wx_) +{ + struct wx *wx = wx_; + struct wx_rule *rule = wx_rule_cast(cls_rule); + long long int hard_expire, idle_expire, expire, now; + + hard_expire = (rule->wr.hard_timeout + ? rule->wr.created + rule->wr.hard_timeout * 1000 + : LLONG_MAX); + idle_expire = (rule->wr.idle_timeout + && (rule->super || list_is_empty(&rule->list)) + ? rule->used + rule->wr.idle_timeout * 1000 + : LLONG_MAX); + expire = MIN(hard_expire, idle_expire); + + now = time_msec(); + if (now < expire) { + if (rule->installed && now >= rule->used + 5000) { + uninstall_idle_flow(wx, rule); + } else if (!rule->wr.cr.flow.wildcards) { + //XXX active_timeout(wx, rule); + } + + return; + } + + COVERAGE_INC(wx_expired); + + /* Update stats. This code will be a no-op if the rule expired + * due to an idle timeout. */ + if (rule->wr.cr.flow.wildcards) { + struct wx_rule *subrule, *next; + LIST_FOR_EACH_SAFE (subrule, next, struct wx_rule, list, &rule->list) { + wx_rule_remove(wx, subrule); + } + } else { + wx_rule_uninstall(wx, rule); + } + +#if 0 /* XXX */ + if (!wx_rule_is_hidden(rule)) { + send_flow_removed(wx, rule, now, + (now >= hard_expire + ? OFPRR_HARD_TIMEOUT : OFPRR_IDLE_TIMEOUT)); + } +#endif + wx_rule_remove(wx, rule); +} + +struct revalidate_cbdata { + struct wx *wx; + bool revalidate_all; /* Revalidate all exact-match rules? */ + bool revalidate_subrules; /* Revalidate all exact-match subrules? */ + //struct tag_set revalidate_set; /* Set of tags to revalidate. */ +}; + +static bool +revalidate_rule(struct wx *wx, struct wx_rule *rule) +{ + const flow_t *flow = &rule->wr.cr.flow; + + COVERAGE_INC(wx_revalidate_rule); + if (rule->super) { + struct wx_rule *super; + super = wx_rule_cast(classifier_lookup_wild(&wx->cls, flow)); + if (!super) { + wx_rule_remove(wx, rule); + return false; + } else if (super != rule->super) { + COVERAGE_INC(wx_revalidate_moved); + list_remove(&rule->list); + list_push_back(&super->list, &rule->list); + rule->super = super; + rule->wr.hard_timeout = super->wr.hard_timeout; + rule->wr.idle_timeout = super->wr.idle_timeout; + rule->wr.created = super->wr.created; + rule->used = 0; + } + } + + wx_rule_update_actions(wx, rule); + return true; +} + +static void +revalidate_cb(struct cls_rule *sub_, void *cbdata_) +{ + struct wx_rule *sub = wx_rule_cast(sub_); + struct revalidate_cbdata *cbdata = cbdata_; + + if (cbdata->revalidate_all + || (cbdata->revalidate_subrules && sub->super) + /*|| (tag_set_intersects(&cbdata->revalidate_set, sub->tags))*/) { + revalidate_rule(cbdata->wx, sub); + } +} + +static void +wx_run_one(struct wx *wx) +{ + wx_port_run(wx); + + if (time_msec() >= wx->next_expiration) { + COVERAGE_INC(wx_expiration); + wx->next_expiration = time_msec() + 1000; + update_used(wx); + + classifier_for_each(&wx->cls, CLS_INC_ALL, expire_rule, wx); + + /* XXX account_checkpoint_cb */ + } + + if (wx->need_revalidate /*|| !tag_set_is_empty(&p->revalidate_set)*/) { + struct revalidate_cbdata cbdata; + cbdata.wx = wx; + cbdata.revalidate_all = false; + cbdata.revalidate_subrules = wx->need_revalidate; + //cbdata.revalidate_set = wx->revalidate_set; + //tag_set_init(&wx->revalidate_set); + COVERAGE_INC(wx_revalidate); + classifier_for_each(&wx->cls, CLS_INC_EXACT, revalidate_cb, &cbdata); + wx->need_revalidate = false; + } +} + +static void +wx_run(void) +{ + struct wx *wx; + + LIST_FOR_EACH (wx, struct wx, list_node, &all_wx) { + wx_run_one(wx); + } + xf_run(); +} + +static void +wx_wait_one(struct wx *wx) +{ + xfif_port_poll_wait(wx->xfif); + netdev_monitor_poll_wait(wx->netdev_monitor); + if (wx->need_revalidate /*|| !tag_set_is_empty(&p->revalidate_set)*/) { + poll_immediate_wake(); + } else if (wx->next_expiration != LLONG_MAX) { + poll_timer_wait(wx->next_expiration - time_msec()); + } +} + +static void +wx_wait(void) +{ + struct wx *wx; + + LIST_FOR_EACH (wx, struct wx, list_node, &all_wx) { + wx_wait_one(wx); + } + xf_wait(); +} + +static int wx_flow_flush(struct wdp *); + +static int +wx_enumerate(const struct wdp_class *wdp_class, struct svec *all_wdps) +{ + struct svec names = SVEC_EMPTY_INITIALIZER; + int error = xf_enumerate_names(wdp_class->type, &names); + svec_move(all_wdps, &names); + return error; +} + +static int +wx_open(const struct wdp_class *wdp_class, const char *name, bool create, + struct wdp **wdpp) +{ + struct xfif *xfif; + int error; + + error = (create + ? xfif_create_and_open(name, wdp_class->type, &xfif) + : xfif_open(name, wdp_class->type, &xfif)); + if (!error) { + struct wx *wx; + - wx = xmalloc(sizeof *wx); ++ wx = xzalloc(sizeof *wx); + list_push_back(&all_wx, &wx->list_node); + wdp_init(&wx->wdp, wdp_class, name, 0, 0); + wx->xfif = xfif; + classifier_init(&wx->cls); + wx->netdev_monitor = netdev_monitor_create(); + port_array_init(&wx->ports); + shash_init(&wx->port_by_name); + wx->next_expiration = time_msec() + 1000; + + wx_port_init(wx); + + *wdpp = &wx->wdp; + } + + return error; +} + +static void +wx_close(struct wdp *wdp) +{ + struct wx *wx = wx_cast(wdp); + + wx_flow_flush(wdp); + xfif_close(wx->xfif); + classifier_destroy(&wx->cls); + netdev_monitor_destroy(wx->netdev_monitor); + list_remove(&wx->list_node); + free(wx); +} + +static int +wx_get_all_names(const struct wdp *wdp, struct svec *all_names) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_get_all_names(wx->xfif, all_names); +} + +static int +wx_destroy(struct wdp *wdp) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_delete(wx->xfif); +} + +static void +hton_ofp_phy_port(struct ofp_phy_port *opp) +{ + opp->port_no = htons(opp->port_no); + opp->config = htonl(opp->config); + opp->state = htonl(opp->state); + opp->curr = htonl(opp->curr); + opp->advertised = htonl(opp->advertised); + opp->supported = htonl(opp->supported); + opp->peer = htonl(opp->peer); +} + +static int +wx_get_features(const struct wdp *wdp, struct ofpbuf **featuresp) +{ + struct wx *wx = wx_cast(wdp); + struct ofp_switch_features *osf; + struct ofpbuf *buf; + unsigned int port_no; + struct wdp_port *port; + + buf = ofpbuf_new(sizeof *osf); + osf = ofpbuf_put_zeros(buf, sizeof *osf); + osf->n_tables = 2; + osf->capabilities = htonl(OFPC_ARP_MATCH_IP); + osf->actions = htonl((1u << OFPAT_OUTPUT) | + (1u << OFPAT_SET_VLAN_VID) | + (1u << OFPAT_SET_VLAN_PCP) | + (1u << OFPAT_STRIP_VLAN) | + (1u << OFPAT_SET_DL_SRC) | + (1u << OFPAT_SET_DL_DST) | + (1u << OFPAT_SET_NW_SRC) | + (1u << OFPAT_SET_NW_DST) | + (1u << OFPAT_SET_NW_TOS) | + (1u << OFPAT_SET_TP_SRC) | + (1u << OFPAT_SET_TP_DST)); + + PORT_ARRAY_FOR_EACH (port, &wx->ports, port_no) { + hton_ofp_phy_port(ofpbuf_put(buf, &port->opp, sizeof port->opp)); + } + + *featuresp = buf; + return 0; +} + +static void +count_subrules(struct cls_rule *cls_rule, void *n_subrules_) +{ + struct wx_rule *rule = wx_rule_cast(cls_rule); + int *n_subrules = n_subrules_; + + if (rule->super) { + (*n_subrules)++; + } +} + +static int +wx_get_stats(const struct wdp *wdp, struct wdp_stats *stats) +{ + struct wx *wx = wx_cast(wdp); + struct xflow_stats xflow_stats; + int n_subrules; + int error; + + error = xfif_get_xf_stats(wx->xfif, &xflow_stats); + + n_subrules = 0; + classifier_for_each(&wx->cls, CLS_INC_EXACT, count_subrules, &n_subrules); + + stats->exact.n_flows = classifier_count_exact(&wx->cls) - n_subrules; + stats->exact.cur_capacity = xflow_stats.cur_capacity; + stats->exact.max_capacity = MIN(WX_MAX_EXACT, xflow_stats.max_capacity); + stats->exact.n_hit = xflow_stats.n_hit; + stats->exact.n_missed = xflow_stats.n_missed; + stats->exact.n_lost = xflow_stats.n_lost; + + stats->wild.n_flows = classifier_count_wild(&wx->cls); + stats->wild.cur_capacity = WX_MAX_WILD; + stats->wild.max_capacity = WX_MAX_WILD; + stats->wild.n_hit = 0; /* XXX */ + stats->wild.n_missed = 0; /* XXX */ + stats->wild.n_lost = 0; /* XXX */ + + stats->n_ports = xflow_stats.n_ports; + stats->max_ports = xflow_stats.max_ports; + + stats->n_frags = xflow_stats.n_frags; + + stats->max_miss_queue = xflow_stats.max_miss_queue; + stats->max_action_queue = xflow_stats.max_action_queue; + stats->max_sflow_queue = xflow_stats.max_sflow_queue; + + return error; +} + +static int +wx_get_drop_frags(const struct wdp *wdp, bool *drop_frags) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_get_drop_frags(wx->xfif, drop_frags); +} + +static int +wx_set_drop_frags(struct wdp *wdp, bool drop_frags) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_set_drop_frags(wx->xfif, drop_frags); +} + +static int +wx_port_add(struct wdp *wdp, const char *devname, + bool internal, uint16_t *port_no) +{ + struct wx *wx = wx_cast(wdp); + uint16_t xflow_flags = internal ? XFLOW_PORT_INTERNAL : 0; + return xfif_port_add(wx->xfif, devname, xflow_flags, port_no); +} + +static int +wx_port_del(struct wdp *wdp, uint16_t port_no) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_port_del(wx->xfif, port_no); +} + +static int +wx_answer_port_query(const struct wdp_port *port, struct wdp_port *portp) +{ + if (port) { + wdp_port_copy(portp, port); + return 0; + } else { + return ENOENT; + } +} + +static int +wx_port_query_by_number(const struct wdp *wdp, uint16_t port_no, + struct wdp_port *portp) +{ + struct wx *wx = wx_cast(wdp); + const struct wdp_port *port; + + port = port_array_get(&wx->ports, ofp_port_to_xflow_port(port_no)); + return wx_answer_port_query(port, portp); +} + +static int +wx_port_query_by_name(const struct wdp *wdp, const char *devname, + struct wdp_port *portp) +{ + struct wx *wx = wx_cast(wdp); + + return wx_answer_port_query(shash_find_data(&wx->port_by_name, devname), + portp); +} + +static int +wx_port_set_config(struct wdp *wdp, uint16_t port_no, uint32_t config) +{ + struct wx *wx = wx_cast(wdp); + struct wdp_port *port; + uint32_t changes; + + port = port_array_get(&wx->ports, ofp_port_to_xflow_port(port_no)); + if (!port) { + return ENOENT; + } + changes = config ^ port->opp.config; + + if (changes & OFPPC_PORT_DOWN) { + int error; + if (config & OFPPC_PORT_DOWN) { + error = netdev_turn_flags_off(port->netdev, NETDEV_UP, true); + } else { + error = netdev_turn_flags_on(port->netdev, NETDEV_UP, true); + } + if (!error) { + port->opp.config ^= OFPPC_PORT_DOWN; + } + } + +#define REVALIDATE_BITS (OFPPC_NO_RECV | OFPPC_NO_RECV_STP | OFPPC_NO_FWD) + if (changes & REVALIDATE_BITS) { + COVERAGE_INC(wx_costly_flags); + port->opp.config ^= changes & REVALIDATE_BITS; + wx->need_revalidate = true; + } +#undef REVALIDATE_BITS + + if (changes & OFPPC_NO_FLOOD) { + port->opp.config ^= OFPPC_NO_FLOOD; + wx_port_refresh_groups(wx); + } + + if (changes & OFPPC_NO_PACKET_IN) { + port->opp.config ^= OFPPC_NO_PACKET_IN; + } + + return 0; +} + +static int +wx_port_list(const struct wdp *wdp, struct wdp_port **portsp, size_t *n_portsp) +{ + struct wx *wx = wx_cast(wdp); + struct wdp_port *ports, *port; + unsigned int port_no; + size_t n_ports, i; + + *n_portsp = n_ports = port_array_count(&wx->ports); + *portsp = ports = xmalloc(n_ports * sizeof *ports); + i = 0; + PORT_ARRAY_FOR_EACH (port, &wx->ports, port_no) { + wdp_port_copy(&ports[i++], port); + } + assert(i == n_ports); + + return 0; +} + +static int +wx_port_poll(const struct wdp *wdp, char **devnamep) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_port_poll(wx->xfif, devnamep); +} + +static void +wx_port_poll_wait(const struct wdp *wdp) +{ + struct wx *wx = wx_cast(wdp); + + xfif_port_poll_wait(wx->xfif); +} + +static struct wdp_rule * +wx_flow_get(const struct wdp *wdp, const flow_t *flow) +{ + struct wx *wx = wx_cast(wdp); + struct wx_rule *rule; + + rule = wx_rule_cast(classifier_find_rule_exactly(&wx->cls, flow)); + return rule && !wx_rule_is_hidden(rule) ? &rule->wr : NULL; +} + +static struct wdp_rule * +wx_flow_match(const struct wdp *wdp, const flow_t *flow) +{ + struct wx *wx = wx_cast(wdp); + struct wx_rule *rule; + + rule = wx_rule_cast(classifier_lookup(&wx->cls, flow)); + if (rule) { + if (wx_rule_is_hidden(rule)) { + rule = rule->super; + } + return &rule->wr; + } else { + return NULL; + } +} + +struct wx_for_each_thunk_aux { + wdp_flow_cb_func *client_callback; + void *client_aux; +}; + +static void +wx_for_each_thunk(struct cls_rule *cls_rule, void *aux_) +{ + struct wx_for_each_thunk_aux *aux = aux_; + struct wx_rule *rule = wx_rule_cast(cls_rule); + + if (!wx_rule_is_hidden(rule)) { + aux->client_callback(&rule->wr, aux->client_aux); + } +} + +static void +wx_flow_for_each_match(const struct wdp *wdp, const flow_t *target, + int include, + wdp_flow_cb_func *client_callback, void *client_aux) +{ + struct wx *wx = wx_cast(wdp); + struct wx_for_each_thunk_aux aux; + + aux.client_callback = client_callback; + aux.client_aux = client_aux; + classifier_for_each_match(&wx->cls, target, include, + wx_for_each_thunk, &aux); +} + +/* Obtains statistic counters for 'rule' within 'wx' and stores them into + * '*stats'. If 'rule' is a wildcarded rule, the returned statistic include + * statistics for all of 'rule''s subrules. */ +static void +query_stats(struct wx *wx, struct wx_rule *rule, struct wdp_flow_stats *stats) +{ + struct wx_rule *subrule; + struct xflow_flow *xflow_flows; + size_t n_xflow_flows; + + /* Start from historical data for 'rule' itself that are no longer tracked + * by the datapath. This counts, for example, subrules that have + * expired. */ + stats->n_packets = rule->packet_count; + stats->n_bytes = rule->byte_count; + stats->inserted = rule->wr.created; + stats->used = LLONG_MIN; + stats->tcp_flags = 0; + stats->ip_tos = 0; + + /* Prepare to ask the datapath for statistics on 'rule', or if it is + * wildcarded then on all of its subrules. + * + * Also, add any statistics that are not tracked by the datapath for each + * subrule. This includes, for example, statistics for packets that were + * executed "by hand" by ofproto via xfif_execute() but must be accounted + * to a flow. */ + n_xflow_flows = rule->wr.cr.flow.wildcards ? list_size(&rule->list) : 1; + xflow_flows = xzalloc(n_xflow_flows * sizeof *xflow_flows); + if (rule->wr.cr.flow.wildcards) { + size_t i = 0; + LIST_FOR_EACH (subrule, struct wx_rule, list, &rule->list) { + xflow_key_from_flow(&xflow_flows[i++].key, &subrule->wr.cr.flow); + stats->n_packets += subrule->packet_count; + stats->n_bytes += subrule->byte_count; + } + } else { + xflow_key_from_flow(&xflow_flows[0].key, &rule->wr.cr.flow); + } + + /* Fetch up-to-date statistics from the datapath and add them in. */ + if (!xfif_flow_get_multiple(wx->xfif, xflow_flows, n_xflow_flows)) { + size_t i; + for (i = 0; i < n_xflow_flows; i++) { + struct xflow_flow *xflow_flow = &xflow_flows[i]; + long long int used; + + stats->n_packets += xflow_flow->stats.n_packets; + stats->n_bytes += xflow_flow->stats.n_bytes; + used = xflow_flow_stats_to_msec(&xflow_flow->stats); + if (used > stats->used) { + stats->used = used; + if (xflow_flow->key.dl_type == htons(ETH_TYPE_IP) + && xflow_flow->key.nw_proto == IP_TYPE_TCP) { + stats->ip_tos = xflow_flow->stats.ip_tos; + } + } + stats->tcp_flags |= xflow_flow->stats.tcp_flags; + } + } + free(xflow_flows); +} + +static int +wx_flow_get_stats(const struct wdp *wdp, + const struct wdp_rule *wdp_rule, + struct wdp_flow_stats *stats) +{ + struct wx *wx = wx_cast(wdp); + struct wx_rule *rule = wx_rule_cast(&wdp_rule->cr); + + query_stats(wx, rule, stats); + return 0; +} + +static bool +wx_flow_overlaps(const struct wdp *wdp, const flow_t *flow) +{ + struct wx *wx = wx_cast(wdp); + + /* XXX overlap with a subrule? */ + return classifier_rule_overlaps(&wx->cls, flow); +} + +static int +wx_flow_put(struct wdp *wdp, const struct wdp_flow_put *put, + struct wdp_flow_stats *old_stats, struct wdp_rule **rulep) +{ + struct wx *wx = wx_cast(wdp); + struct wx_rule *rule; + + rule = wx_rule_cast(classifier_find_rule_exactly(&wx->cls, put->flow)); + if (rule && wx_rule_is_hidden(rule)) { + rule = NULL; + } + + if (rule) { + if (!(put->flags & WDP_PUT_MODIFY)) { + return EEXIST; + } + } else { + if (!(put->flags & WDP_PUT_CREATE)) { + return EINVAL; + } + if ((put->flow->wildcards + ? classifier_count_wild(&wx->cls) >= WX_MAX_WILD + : classifier_count_exact(&wx->cls) >= WX_MAX_EXACT)) { + /* XXX subrules should not count against exact-match limit */ + return ENOBUFS; + } + } + + rule = wx_rule_create(NULL, put->actions, put->n_actions, + put->idle_timeout, put->hard_timeout); - cls_rule_from_flow(&rule->wr.cr, put->flow); ++ cls_rule_from_flow(put->flow, &rule->wr.cr); + wx_rule_insert(wx, rule, NULL, 0); + + if (old_stats) { + /* XXX */ + memset(old_stats, 0, sizeof *old_stats); + } + if (rulep) { + *rulep = &rule->wr; + } + + return 0; +} + +static int +wx_flow_delete(struct wdp *wdp, struct wdp_rule *wdp_rule, + struct wdp_flow_stats *final_stats) +{ + struct wx *wx = wx_cast(wdp); + struct wx_rule *rule = wx_rule_cast(&wdp_rule->cr); + + wx_rule_remove(wx, rule); + if (final_stats) { + memset(final_stats, 0, sizeof *final_stats); /* XXX */ + } + return 0; +} + +static void +wx_flush_rule(struct cls_rule *cls_rule, void *wx_) +{ + struct wx_rule *rule = wx_rule_cast(cls_rule); + struct wx *wx = wx_; + + /* Mark the flow as not installed, even though it might really be + * installed, so that wx_rule_remove() doesn't bother trying to uninstall + * it. There is no point in uninstalling it individually since we are + * about to blow away all the flows with xfif_flow_flush(). */ + rule->installed = false; + + wx_rule_remove(wx, rule); +} + +static int +wx_flow_flush(struct wdp *wdp) +{ + struct wx *wx = wx_cast(wdp); + + COVERAGE_INC(wx_flow_flush); + classifier_for_each(&wx->cls, CLS_INC_ALL, wx_flush_rule, wx); + xfif_flow_flush(wx->xfif); + return 0; +} + +static int +wx_execute(struct wdp *wdp, uint16_t in_port, + const union ofp_action actions[], int n_actions, + const struct ofpbuf *packet) +{ + struct wx *wx = wx_cast(wdp); + struct xflow_actions xflow_actions; + flow_t flow; + int error; + - flow_extract((struct ofpbuf *) packet, in_port, &flow); ++ flow_extract((struct ofpbuf *) packet, 0, in_port, &flow); + error = wx_xlate_actions(wx, actions, n_actions, &flow, packet, + &xflow_actions, NULL); + if (error) { + return error; + } + xfif_execute(wx->xfif, ofp_port_to_xflow_port(in_port), + xflow_actions.actions, xflow_actions.n_actions, packet); + return 0; +} + +static int +wx_flow_inject(struct wdp *wdp, struct wdp_rule *wdp_rule, + uint16_t in_port, const struct ofpbuf *packet) +{ + struct wx_rule *rule = wx_rule_cast(&wdp_rule->cr); + int error; + + error = wx_execute(wdp, in_port, rule->wr.actions, rule->wr.n_actions, + packet); + if (!error) { + rule->packet_count++; + rule->byte_count += packet->size; + rule->used = time_msec(); + } + return error; +} + +static int +wx_recv_get_mask(const struct wdp *wdp, int *listen_mask) +{ + struct wx *wx = wx_cast(wdp); + int xflow_listen_mask; + int error; + + error = xfif_recv_get_mask(wx->xfif, &xflow_listen_mask); + if (!error) { + *listen_mask = 0; + if (xflow_listen_mask & XFLOWL_MISS) { + *listen_mask |= 1 << WDP_CHAN_MISS; + } + if (xflow_listen_mask & XFLOWL_ACTION) { + *listen_mask |= 1 << WDP_CHAN_ACTION; + } + if (xflow_listen_mask & XFLOWL_SFLOW) { + *listen_mask |= 1 << WDP_CHAN_SFLOW; + } + } + return error; +} + +static int +wx_recv_set_mask(struct wdp *wdp, int listen_mask) +{ + struct wx *wx = wx_cast(wdp); + int xflow_listen_mask; + + xflow_listen_mask = 0; + if (listen_mask & (1 << WDP_CHAN_MISS)) { + xflow_listen_mask |= XFLOWL_MISS; + } + if (listen_mask & (1 << WDP_CHAN_ACTION)) { + xflow_listen_mask |= XFLOWL_ACTION; + } + if (listen_mask & (1 << WDP_CHAN_SFLOW)) { + xflow_listen_mask |= XFLOWL_SFLOW; + } + + return xfif_recv_set_mask(wx->xfif, xflow_listen_mask); +} + +static int +wx_get_sflow_probability(const struct wdp *wdp, uint32_t *probability) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_get_sflow_probability(wx->xfif, probability); +} + +static int +wx_set_sflow_probability(struct wdp *wdp, uint32_t probability) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_set_sflow_probability(wx->xfif, probability); +} + +static int +wx_translate_xflow_msg(struct xflow_msg *msg, struct ofpbuf *payload, + struct wdp_packet *packet) +{ + packet->in_port = xflow_port_to_ofp_port(msg->port); + packet->send_len = 0; ++ packet->tun_id = 0; + + switch (msg->type) { + case _XFLOWL_MISS_NR: + packet->channel = WDP_CHAN_MISS; + packet->payload = payload; ++ packet->tun_id = msg->arg; + return 0; + + case _XFLOWL_ACTION_NR: + packet->channel = WDP_CHAN_ACTION; + packet->payload = payload; + packet->send_len = msg->arg; + return 0; + + case _XFLOWL_SFLOW_NR: + /* XXX */ + ofpbuf_delete(payload); + return ENOSYS; + + default: + VLOG_WARN_RL(&rl, "received XFLOW message of unexpected type %"PRIu32, + msg->type); + ofpbuf_delete(payload); + return ENOSYS; + } +} + +static const uint8_t * +get_local_mac(const struct wx *wx) +{ + const struct wdp_port *port = port_array_get(&wx->ports, XFLOWP_LOCAL); + return port ? port->opp.hw_addr : NULL; +} + +/* Returns true if 'packet' is a DHCP reply to the local port. Such a reply + * should be sent to the local port regardless of the flow table. + * + * We only need this, strictly speaking, when in-band control is turned on. */ +static bool +wx_is_local_dhcp_reply(const struct wx *wx, + const flow_t *flow, const struct ofpbuf *packet) +{ + if (flow->dl_type == htons(ETH_TYPE_IP) + && flow->nw_proto == IP_TYPE_UDP + && flow->tp_src == htons(DHCP_SERVER_PORT) + && flow->tp_dst == htons(DHCP_CLIENT_PORT) + && packet->l7) + { + const uint8_t *local_mac = get_local_mac(wx); + struct dhcp_header *dhcp = ofpbuf_at( + packet, (char *)packet->l7 - (char *)packet->data, sizeof *dhcp); + return dhcp && local_mac && eth_addr_equals(dhcp->chaddr, local_mac); + } + + return false; +} + +static bool +wx_explode_rule(struct wx *wx, struct xflow_msg *msg, struct ofpbuf *payload) +{ + struct wx_rule *rule; + flow_t flow; + - flow_extract(payload, xflow_port_to_ofp_port(msg->port), &flow); ++ flow_extract(payload, 0, xflow_port_to_ofp_port(msg->port), &flow); + + if (wx_is_local_dhcp_reply(wx, &flow, payload)) { + union xflow_action action; + + memset(&action, 0, sizeof(action)); + action.output.type = XFLOWAT_OUTPUT; + action.output.port = XFLOWP_LOCAL; + xfif_execute(wx->xfif, msg->port, &action, 1, payload); + } + + rule = wx_rule_lookup_valid(wx, &flow); + if (!rule) { + return false; + } + + if (rule->wr.cr.flow.wildcards) { + rule = wx_rule_create_subrule(wx, rule, &flow); + wx_rule_make_actions(wx, rule, payload); + } else { + if (!rule->may_install) { + /* The rule is not installable, that is, we need to process every + * packet, so process the current packet and set its actions into + * 'subrule'. */ + wx_rule_make_actions(wx, rule, payload); + } else { + /* XXX revalidate rule if it needs it */ + } + } + + wx_rule_execute(wx, rule, payload, &flow); + wx_rule_reinstall(wx, rule); + + return true; +} + +static int +wx_recv(struct wdp *wdp, struct wdp_packet *packet) +{ + struct wx *wx = wx_cast(wdp); + int i; + + /* XXX need to avoid 50*50 potential cost for caller. */ + for (i = 0; i < 50; i++) { + struct xflow_msg *msg; + struct ofpbuf *buf; + int error; + + error = xfif_recv(wx->xfif, &buf); + if (error) { + return error; + } + + msg = ofpbuf_pull(buf, sizeof *msg); + if (msg->type != _XFLOWL_MISS_NR || !wx_explode_rule(wx, msg, buf)) { + return wx_translate_xflow_msg(msg, buf, packet); + } + ofpbuf_delete(buf); + } + return EAGAIN; +} + +static void +wx_recv_wait(struct wdp *wdp) +{ + struct wx *wx = wx_cast(wdp); + + xfif_recv_wait(wx->xfif); +} + +static void wx_port_update(struct wx *, const char *devname); +static void wx_port_reinit(struct wx *); + +static void +wx_port_process_change(struct wx *wx, int error, char *devname) +{ + if (error == ENOBUFS) { + wx_port_reinit(wx); + } else if (!error) { + wx_port_update(wx, devname); + free(devname); + } +} + +static void +wx_port_run(struct wx *wx) +{ + char *devname; + int error; + + while ((error = xfif_port_poll(wx->xfif, &devname)) != EAGAIN) { + wx_port_process_change(wx, error, devname); + } + while ((error = netdev_monitor_poll(wx->netdev_monitor, + &devname)) != EAGAIN) { + wx_port_process_change(wx, error, devname); + } +} + +static size_t +wx_port_refresh_group(struct wx *wx, unsigned int group) +{ + uint16_t *ports; + size_t n_ports; + struct wdp_port *port; + unsigned int port_no; + + assert(group == WX_GROUP_ALL || group == WX_GROUP_FLOOD); + + ports = xmalloc(port_array_count(&wx->ports) * sizeof *ports); + n_ports = 0; + PORT_ARRAY_FOR_EACH (port, &wx->ports, port_no) { + if (group == WX_GROUP_ALL || !(port->opp.config & OFPPC_NO_FLOOD)) { + ports[n_ports++] = port_no; + } + } + xfif_port_group_set(wx->xfif, group, ports, n_ports); + free(ports); + + return n_ports; +} + +static void +wx_port_refresh_groups(struct wx *wx) +{ + wx_port_refresh_group(wx, WX_GROUP_FLOOD); + wx_port_refresh_group(wx, WX_GROUP_ALL); +} + +static void +wx_port_reinit(struct wx *wx) +{ + struct svec devnames; + struct wdp_port *wdp_port; + unsigned int port_no; + struct xflow_port *xflow_ports; + size_t n_xflow_ports; + size_t i; + + svec_init(&devnames); + PORT_ARRAY_FOR_EACH (wdp_port, &wx->ports, port_no) { + svec_add (&devnames, (char *) wdp_port->opp.name); + } + xfif_port_list(wx->xfif, &xflow_ports, &n_xflow_ports); + for (i = 0; i < n_xflow_ports; i++) { + svec_add(&devnames, xflow_ports[i].devname); + } + free(xflow_ports); + + svec_sort_unique(&devnames); + for (i = 0; i < devnames.n; i++) { + wx_port_update(wx, devnames.names[i]); + } + svec_destroy(&devnames); + + wx_port_refresh_groups(wx); +} + +static struct wdp_port * +make_wdp_port(const struct xflow_port *xflow_port) +{ + struct netdev_options netdev_options; + enum netdev_flags flags; + struct wdp_port *wdp_port; + struct netdev *netdev; + bool carrier; + int error; + + memset(&netdev_options, 0, sizeof netdev_options); + netdev_options.name = xflow_port->devname; + netdev_options.ethertype = NETDEV_ETH_TYPE_NONE; + netdev_options.may_create = true; + netdev_options.may_open = true; + + error = netdev_open(&netdev_options, &netdev); + if (error) { + VLOG_WARN_RL(&rl, "ignoring port %s (%"PRIu16") because netdev %s " + "cannot be opened (%s)", + xflow_port->devname, xflow_port->port, + xflow_port->devname, strerror(error)); + return NULL; + } + + wdp_port = xmalloc(sizeof *wdp_port); + wdp_port->netdev = netdev; + wdp_port->opp.port_no = xflow_port_to_ofp_port(xflow_port->port); + netdev_get_etheraddr(netdev, wdp_port->opp.hw_addr); + strncpy((char *) wdp_port->opp.name, xflow_port->devname, + sizeof wdp_port->opp.name); + wdp_port->opp.name[sizeof wdp_port->opp.name - 1] = '\0'; + + netdev_get_flags(netdev, &flags); + wdp_port->opp.config = flags & NETDEV_UP ? 0 : OFPPC_PORT_DOWN; + + netdev_get_carrier(netdev, &carrier); + wdp_port->opp.state = carrier ? 0 : OFPPS_LINK_DOWN; + + netdev_get_features(netdev, + &wdp_port->opp.curr, &wdp_port->opp.advertised, + &wdp_port->opp.supported, &wdp_port->opp.peer); + + wdp_port->devname = xstrdup(xflow_port->devname); + wdp_port->internal = (xflow_port->flags & XFLOW_PORT_INTERNAL) != 0; + return wdp_port; +} + +static bool +wx_port_conflicts(const struct wx *wx, const struct xflow_port *xflow_port) +{ + if (port_array_get(&wx->ports, xflow_port->port)) { + VLOG_WARN_RL(&rl, "ignoring duplicate port %"PRIu16" in datapath", + xflow_port->port); + return true; + } else if (shash_find(&wx->port_by_name, xflow_port->devname)) { + VLOG_WARN_RL(&rl, "ignoring duplicate device %s in datapath", + xflow_port->devname); + return true; + } else { + return false; + } +} + +static int +wdp_port_equal(const struct wdp_port *a_, const struct wdp_port *b_) +{ + const struct ofp_phy_port *a = &a_->opp; + const struct ofp_phy_port *b = &b_->opp; + + BUILD_ASSERT_DECL(sizeof *a == 48); /* Detect ofp_phy_port changes. */ + return (a->port_no == b->port_no + && !memcmp(a->hw_addr, b->hw_addr, sizeof a->hw_addr) + && !strcmp((char *) a->name, (char *) b->name) + && a->state == b->state + && a->config == b->config + && a->curr == b->curr + && a->advertised == b->advertised + && a->supported == b->supported + && a->peer == b->peer); +} + +static void +wx_port_install(struct wx *wx, struct wdp_port *wdp_port) +{ + uint16_t xflow_port = ofp_port_to_xflow_port(wdp_port->opp.port_no); + const char *netdev_name = (const char *) wdp_port->opp.name; + + netdev_monitor_add(wx->netdev_monitor, wdp_port->netdev); + port_array_set(&wx->ports, xflow_port, wdp_port); + shash_add(&wx->port_by_name, netdev_name, wdp_port); +} + +static void +wx_port_remove(struct wx *wx, struct wdp_port *wdp_port) +{ + uint16_t xflow_port = ofp_port_to_xflow_port(wdp_port->opp.port_no); + + netdev_monitor_remove(wx->netdev_monitor, wdp_port->netdev); + port_array_set(&wx->ports, xflow_port, NULL); + shash_delete(&wx->port_by_name, + shash_find(&wx->port_by_name, (char *) wdp_port->opp.name)); +} + +static void +wx_port_free(struct wdp_port *wdp_port) +{ + if (wdp_port) { + netdev_close(wdp_port->netdev); + free(wdp_port); + } +} + +static void +wx_port_update(struct wx *wx, const char *devname) +{ + struct xflow_port xflow_port; + struct wdp_port *old_wdp_port; + struct wdp_port *new_wdp_port; + int error; + + COVERAGE_INC(wx_update_port); + + /* Query the datapath for port information. */ + error = xfif_port_query_by_name(wx->xfif, devname, &xflow_port); + + /* Find the old wdp_port. */ + old_wdp_port = shash_find_data(&wx->port_by_name, devname); + if (!error) { + if (!old_wdp_port) { + /* There's no port named 'devname' but there might be a port with + * the same port number. This could happen if a port is deleted + * and then a new one added in its place very quickly, or if a port + * is renamed. In the former case we want to send an OFPPR_DELETE + * and an OFPPR_ADD, and in the latter case we want to send a + * single OFPPR_MODIFY. We can distinguish the cases by comparing + * the old port's ifindex against the new port, or perhaps less + * reliably but more portably by comparing the old port's MAC + * against the new port's MAC. However, this code isn't that smart + * and always sends an OFPPR_MODIFY (XXX). */ + old_wdp_port = port_array_get(&wx->ports, xflow_port.port); + } + } else if (error != ENOENT && error != ENODEV) { + VLOG_WARN_RL(&rl, "xfif_port_query_by_name returned unexpected error " + "%s", strerror(error)); + return; + } + + /* Create a new wdp_port. */ + new_wdp_port = !error ? make_wdp_port(&xflow_port) : NULL; + + /* Eliminate a few pathological cases. */ + if (!old_wdp_port && !new_wdp_port) { + return; + } else if (old_wdp_port && new_wdp_port) { + /* Most of the 'config' bits are OpenFlow soft state, but + * OFPPC_PORT_DOWN is maintained by the kernel. So transfer the + * OpenFlow bits from old_wdp_port. (make_wdp_port() only sets + * OFPPC_PORT_DOWN and leaves the other bits 0.) */ + new_wdp_port->opp.config |= old_wdp_port->opp.config & ~OFPPC_PORT_DOWN; + + if (wdp_port_equal(old_wdp_port, new_wdp_port)) { + /* False alarm--no change. */ + wx_port_free(new_wdp_port); + return; + } + } + + /* Now deal with the normal cases. */ + if (old_wdp_port) { + wx_port_remove(wx, old_wdp_port); + } + if (new_wdp_port) { + wx_port_install(wx, new_wdp_port); + } + wx_port_free(old_wdp_port); +} + +static int +wx_port_init(struct wx *wx) +{ + struct xflow_port *ports; + size_t n_ports; + size_t i; + int error; + + error = xfif_port_list(wx->xfif, &ports, &n_ports); + if (error) { + return error; + } + + for (i = 0; i < n_ports; i++) { + const struct xflow_port *xflow_port = &ports[i]; + if (!wx_port_conflicts(wx, xflow_port)) { + struct wdp_port *wdp_port = make_wdp_port(xflow_port); + if (wdp_port) { + wx_port_install(wx, wdp_port); + } + } + } + free(ports); + wx_port_refresh_groups(wx); + return 0; +} + +void +wdp_xflow_register(void) +{ + static const struct wdp_class wdp_xflow_class = { + NULL, /* name */ + wx_run, + wx_wait, + wx_enumerate, + wx_open, + wx_close, + wx_get_all_names, + wx_destroy, + wx_get_features, + wx_get_stats, + wx_get_drop_frags, + wx_set_drop_frags, + wx_port_add, + wx_port_del, + wx_port_query_by_number, + wx_port_query_by_name, + wx_port_list, + wx_port_set_config, + wx_port_poll, + wx_port_poll_wait, + wx_flow_get, + wx_flow_match, + wx_flow_for_each_match, + wx_flow_get_stats, + wx_flow_overlaps, + wx_flow_put, + wx_flow_delete, + wx_flow_flush, + wx_flow_inject, + wx_execute, + wx_recv_get_mask, + wx_recv_set_mask, + wx_get_sflow_probability, + wx_set_sflow_probability, + wx_recv, + wx_recv_wait, + }; + + static bool inited = false; + + struct svec types; + const char *type; + bool registered; + int i; + + if (inited) { + return; + } + inited = true; + + svec_init(&types); + xf_enumerate_types(&types); + + registered = false; + SVEC_FOR_EACH (i, type, &types) { + struct wdp_class *class; + + class = xmalloc(sizeof *class); + *class = wdp_xflow_class; + class->type = xstrdup(type); + if (registered) { + class->run = NULL; + class->wait = NULL; + } + if (!wdp_register_provider(class)) { + registered = true; + } + } + + svec_destroy(&types); +} diff --cc ofproto/wdp.c index 31f129238,000000000..7e04110c2 mode 100644,000000..100644 --- a/ofproto/wdp.c +++ b/ofproto/wdp.c @@@ -1,1035 -1,0 +1,1048 @@@ +/* + * Copyright (c) 2008, 2009, 2010 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "wdp-provider.h" + +#include +#include +#include +#include +#include +#include + +#include "coverage.h" +#include "dynamic-string.h" +#include "flow.h" +#include "netdev.h" +#include "netlink.h" +#include "ofp-print.h" +#include "ofpbuf.h" +#include "packets.h" +#include "poll-loop.h" +#include "shash.h" +#include "svec.h" +#include "timeval.h" +#include "util.h" +#include "valgrind.h" +#include "wdp-xflow.h" + +#include "vlog.h" +#define THIS_MODULE VLM_wdp + +/* wdp_rule */ + +/* Initializes a new 'struct wdp_rule', copying in the 'n_actions' elements of + * 'actions'. + * + * The caller is responsible for initializing 'rule->cr'. */ +void +wdp_rule_init(struct wdp_rule *rule, const union ofp_action *actions, + size_t n_actions) +{ + rule->actions = xmemdup(actions, n_actions * sizeof *actions); + rule->n_actions = n_actions; + rule->created = time_msec(); + rule->idle_timeout = 0; + rule->hard_timeout = 0; + rule->client_data = NULL; +} + +/* Frees the data in 'rule'. */ +void +wdp_rule_uninit(struct wdp_rule *rule) +{ + free(rule->actions); +} + +/* wdp */ + +static const struct wdp_class *base_wdp_classes[] = { + /* XXX none yet */ +}; + +struct registered_wdp_class { + const struct wdp_class *wdp_class; + int refcount; +}; + +static struct shash wdp_classes = SHASH_INITIALIZER(&wdp_classes); + +/* Rate limit for individual messages going to or from the datapath, output at + * DBG level. This is very high because, if these are enabled, it is because + * we really need to see them. */ +static struct vlog_rate_limit wdpmsg_rl = VLOG_RATE_LIMIT_INIT(600, 600); + +/* Not really much point in logging many wdp errors. */ +static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5); + +static void log_operation(const struct wdp *, const char *operation, + int error); + +static void +wdp_initialize(void) +{ + static int status = -1; + + if (status < 0) { + int i; + + status = 0; + for (i = 0; i < ARRAY_SIZE(base_wdp_classes); i++) { + wdp_register_provider(base_wdp_classes[i]); + } + wdp_xflow_register(); + } +} + +/* Performs periodic work needed by all the various kinds of wdps. + * + * If your program opens any wdps, it must call both this function and + * netdev_run() within its main poll loop. */ +void +wdp_run(void) +{ + struct shash_node *node; + SHASH_FOR_EACH (node, &wdp_classes) { + const struct registered_wdp_class *registered_class = node->data; + if (registered_class->wdp_class->run) { + registered_class->wdp_class->run(); + } + } +} + +/* Arranges for poll_block() to wake up when wdp_run() needs to be called. + * + * If your program opens any wdps, it must call both this function and + * netdev_wait() within its main poll loop. */ +void +wdp_wait(void) +{ + struct shash_node *node; + SHASH_FOR_EACH(node, &wdp_classes) { + const struct registered_wdp_class *registered_class = node->data; + if (registered_class->wdp_class->wait) { + registered_class->wdp_class->wait(); + } + } +} + +/* Registers a new datapath provider. After successful registration, new + * datapaths of that type can be opened using wdp_open(). */ +int +wdp_register_provider(const struct wdp_class *new_class) +{ + struct registered_wdp_class *registered_class; + + if (shash_find(&wdp_classes, new_class->type)) { + VLOG_WARN("attempted to register duplicate datapath provider: %s", + new_class->type); + return EEXIST; + } + + registered_class = xmalloc(sizeof *registered_class); + registered_class->wdp_class = new_class; + registered_class->refcount = 0; + + shash_add(&wdp_classes, new_class->type, registered_class); + + return 0; +} + +/* Unregisters a datapath provider. 'type' must have been previously + * registered and not currently be in use by any wdps. After unregistration + * new datapaths of that type cannot be opened using wdp_open(). */ +int +wdp_unregister_provider(const char *type) +{ + struct shash_node *node; + struct registered_wdp_class *registered_class; + + node = shash_find(&wdp_classes, type); + if (!node) { + VLOG_WARN("attempted to unregister a datapath provider that is not " + "registered: %s", type); + return EAFNOSUPPORT; + } + + registered_class = node->data; + if (registered_class->refcount) { + VLOG_WARN("attempted to unregister in use datapath provider: %s", + type); + return EBUSY; + } + + shash_delete(&wdp_classes, node); + free(registered_class); + + return 0; +} + +/* Clears 'types' and enumerates the types of all currently registered wdp + * providers into it. The caller must first initialize the svec. */ +void +wdp_enumerate_types(struct svec *types) +{ + struct shash_node *node; + + wdp_initialize(); + svec_clear(types); + + SHASH_FOR_EACH (node, &wdp_classes) { + const struct registered_wdp_class *registered_class = node->data; + svec_add(types, registered_class->wdp_class->type); + } +} + +/* Clears 'names' and enumerates the names of all known created datapaths + * with the given 'type'. The caller must first initialize the svec. Returns 0 + * if successful, otherwise a positive errno value. + * + * Some kinds of datapaths might not be practically enumerable. This is not + * considered an error. */ +int +wdp_enumerate_names(const char *type, struct svec *names) +{ + const struct registered_wdp_class *registered_class; + const struct wdp_class *wdp_class; + int error; + + wdp_initialize(); + svec_clear(names); + + registered_class = shash_find_data(&wdp_classes, type); + if (!registered_class) { + VLOG_WARN("could not enumerate unknown type: %s", type); + return EAFNOSUPPORT; + } + + wdp_class = registered_class->wdp_class; + error = (wdp_class->enumerate + ? wdp_class->enumerate(wdp_class, names) + : 0); + + if (error) { + VLOG_WARN("failed to enumerate %s datapaths: %s", wdp_class->type, + strerror(error)); + } + + return error; +} + +/* Parses 'datapath_name', which is of the form type@name, into its + * component pieces. 'name' and 'type' must be freed by the caller. */ +void +wdp_parse_name(const char *datapath_name_, char **name, char **type) +{ + char *datapath_name = xstrdup(datapath_name_); + char *separator; + + separator = strchr(datapath_name, '@'); + if (separator) { + *separator = '\0'; + *type = datapath_name; + *name = xstrdup(separator + 1); + } else { + *name = datapath_name; + *type = NULL; + } +} + +static int +do_open(const char *name, const char *type, bool create, struct wdp **wdpp) +{ + struct wdp *wdp = NULL; + int error; + struct registered_wdp_class *registered_class; + + wdp_initialize(); + + if (!type || *type == '\0') { + type = "system"; + } + + registered_class = shash_find_data(&wdp_classes, type); + if (!registered_class) { + VLOG_WARN("could not create datapath %s of unknown type %s", name, + type); + error = EAFNOSUPPORT; + goto exit; + } + + error = registered_class->wdp_class->open(registered_class->wdp_class, + name, create, &wdp); + if (!error) { + registered_class->refcount++; + } + +exit: + *wdpp = error ? NULL : wdp; + return error; +} + +/* Tries to open an existing datapath named 'name' and type 'type'. Will fail + * if no datapath with 'name' and 'type' exists. 'type' may be either NULL or + * the empty string to specify the default system type. Returns 0 if + * successful, otherwise a positive errno value. On success stores a pointer + * to the datapath in '*wdpp', otherwise a null pointer. */ +int +wdp_open(const char *name, const char *type, struct wdp **wdpp) +{ + return do_open(name, type, false, wdpp); +} + +/* Tries to create and open a new datapath with the given 'name' and 'type'. + * 'type' may be either NULL or the empty string to specify the default system + * type. Will fail if a datapath with 'name' and 'type' already exists. + * Returns 0 if successful, otherwise a positive errno value. On success + * stores a pointer to the datapath in '*wdpp', otherwise a null pointer. */ +int +wdp_create(const char *name, const char *type, struct wdp **wdpp) +{ + return do_open(name, type, true, wdpp); +} + +/* Tries to open a datapath with the given 'name' and 'type', creating it if it + * does not exist. 'type' may be either NULL or the empty string to specify + * the default system type. Returns 0 if successful, otherwise a positive + * errno value. On success stores a pointer to the datapath in '*wdpp', + * otherwise a null pointer. */ +int +wdp_create_and_open(const char *name, const char *type, struct wdp **wdpp) +{ + int error; + + error = wdp_create(name, type, wdpp); + if (error == EEXIST || error == EBUSY) { + error = wdp_open(name, type, wdpp); + if (error) { + VLOG_WARN("datapath %s already exists but cannot be opened: %s", + name, strerror(error)); + } + } else if (error) { + VLOG_WARN("failed to create datapath %s: %s", name, strerror(error)); + } + return error; +} + +/* Closes and frees the connection to 'wdp'. Does not destroy the wdp + * itself; call wdp_delete() first, instead, if that is desirable. */ +void +wdp_close(struct wdp *wdp) +{ + if (wdp) { + struct registered_wdp_class *registered_class; + + registered_class = shash_find_data(&wdp_classes, + wdp->wdp_class->type); + assert(registered_class); + assert(registered_class->refcount); + + registered_class->refcount--; + wdp_uninit(wdp, true); + } +} + +/* Returns the name of datapath 'wdp' prefixed with the type + * (for use in log messages). */ +const char * +wdp_name(const struct wdp *wdp) +{ + return wdp->full_name; +} + +/* Returns the name of datapath 'wdp' without the type + * (for use in device names). */ +const char * +wdp_base_name(const struct wdp *wdp) +{ + return wdp->base_name; +} + +/* Enumerates all names that may be used to open 'wdp' into 'all_names'. The + * Linux datapath, for example, supports opening a datapath both by number, + * e.g. "wdp0", and by the name of the datapath's local port. For some + * datapaths, this might be an infinite set (e.g. in a file name, slashes may + * be duplicated any number of times), in which case only the names most likely + * to be used will be enumerated. + * + * The caller must already have initialized 'all_names'. Any existing names in + * 'all_names' will not be disturbed. */ +int +wdp_get_all_names(const struct wdp *wdp, struct svec *all_names) +{ + if (wdp->wdp_class->get_all_names) { + int error = wdp->wdp_class->get_all_names(wdp, all_names); + if (error) { + VLOG_WARN_RL(&error_rl, + "failed to retrieve names for datpath %s: %s", + wdp_name(wdp), strerror(error)); + } + return error; + } else { + svec_add(all_names, wdp_base_name(wdp)); + return 0; + } +} + +/* Destroys the datapath that 'wdp' is connected to, first removing all of + * its ports. After calling this function, it does not make sense to pass + * 'wdp' to any functions other than wdp_name() or wdp_close(). */ +int +wdp_delete(struct wdp *wdp) +{ + int error; + + COVERAGE_INC(wdp_destroy); + + error = wdp->wdp_class->destroy(wdp); + log_operation(wdp, "delete", error); + return error; +} + +/* Obtains the set of features supported by 'wdp'. + * + * If successful, returns 0 and stores in '*featuresp' a newly allocated + * "struct ofp_switch_features" that describes the features and ports supported + * by 'wdp'. The caller is responsible for initializing the header, + * datapath_id, and n_buffers members of the returned "struct + * ofp_switch_features". The caller must free the returned buffer (with + * ofpbuf_delete()) when it is no longer needed. + * + * On error, returns an OpenFlow error code (as constructed by ofp_mkerr()) and + * sets '*featuresp' to NULL. */ +int +wdp_get_features(const struct wdp *wdp, struct ofpbuf **featuresp) +{ + int error = wdp->wdp_class->get_features(wdp, featuresp); + if (error) { + *featuresp = NULL; + } + return error; +} + +/* Retrieves statistics for 'wdp' into 'stats'. Returns 0 if successful, + * otherwise a positive errno value. On error, clears 'stats' to + * all-bits-zero. */ +int +wdp_get_wdp_stats(const struct wdp *wdp, struct wdp_stats *stats) +{ + int error = wdp->wdp_class->get_stats(wdp, stats); + if (error) { + memset(stats, 0, sizeof *stats); + } + log_operation(wdp, "get_stats", error); + return error; +} + +/* Retrieves the current IP fragment handling policy for 'wdp' into + * '*drop_frags': true indicates that fragments are dropped, false indicates + * that fragments are treated in the same way as other IP packets (except that + * the L4 header cannot be read). Returns 0 if successful, otherwise a + * positive errno value. */ +int +wdp_get_drop_frags(const struct wdp *wdp, bool *drop_frags) +{ + int error = wdp->wdp_class->get_drop_frags(wdp, drop_frags); + if (error) { + *drop_frags = false; + } + log_operation(wdp, "get_drop_frags", error); + return error; +} + +/* Changes 'wdp''s treatment of IP fragments to 'drop_frags', whose meaning is + * the same as for the get_drop_frags member function. Returns 0 if + * successful, otherwise a positive errno value. EOPNOTSUPP indicates that + * 'wdp''s fragment dropping policy is not configurable. */ +int +wdp_set_drop_frags(struct wdp *wdp, bool drop_frags) +{ + int error; + error = (wdp->wdp_class->set_drop_frags + ? wdp->wdp_class->set_drop_frags(wdp, drop_frags) + : EOPNOTSUPP); + log_operation(wdp, "set_drop_frags", error); + return error; +} + +/* Clears the contents of 'port'. */ +void +wdp_port_clear(struct wdp_port *port) +{ + memset(port, 0, sizeof *port); +} + +/* Makes a deep copy of 'old' in 'port'. The caller may free 'port''s data + * with wdp_port_free(). */ +void +wdp_port_copy(struct wdp_port *port, const struct wdp_port *old) +{ + port->netdev = old->netdev ? netdev_reopen(old->netdev) : NULL; + port->opp = old->opp; + port->devname = old->devname ? xstrdup(old->devname) : NULL; + port->internal = old->internal; +} + +/* Frees the data that 'port' points to (but not 'port' itself). */ +void +wdp_port_free(struct wdp_port *port) +{ + if (port) { + netdev_close(port->netdev); + free(port->devname); + } +} + +/* Frees the data that each of the 'n' ports in 'ports' points to, and then + * frees 'ports' itself. */ +void +wdp_port_array_free(struct wdp_port *ports, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) { + wdp_port_free(&ports[i]); + } + free(ports); +} + +/* Attempts to add 'devname' as a port on 'wdp': + * + * - If 'internal' is true, attempts to create a new internal port (a virtual + * port implemented in software) by that name. + * + * - If 'internal' is false, 'devname' must name an existing network device. + * + * If successful, returns 0 and sets '*port_nop' to the new port's OpenFlow + * port number (if 'port_nop' is non-null). On failure, returns a positive + * errno value and sets '*port_nop' to OFPP_NONE (if 'port_nop' is non-null). + * + * Some wildcarded datapaths might have fixed sets of ports. For these + * datapaths this function will always fail. + * + * Possible error return values include: + * + * - ENODEV: No device named 'devname' exists (if 'internal' is false). + * + * - EEXIST: A device named 'devname' already exists (if 'internal' is true). + * + * - EINVAL: Device 'devname' is not supported as part of a datapath (e.g. it + * is not an Ethernet device), or 'devname' is too long for a network + * device name (if 'internal' is true) + * + * - EFBIG: The datapath already has as many ports as it can support. + * + * - EOPNOTSUPP: 'wdp' has a fixed set of ports. + */ +int +wdp_port_add(struct wdp *wdp, const char *devname, + bool internal, uint16_t *port_nop) +{ + uint16_t port_no; + int error; + + COVERAGE_INC(wdp_port_add); + + error = (wdp->wdp_class->port_add + ? wdp->wdp_class->port_add(wdp, devname, internal, &port_no) + : EOPNOTSUPP); + if (!error) { + VLOG_DBG_RL(&wdpmsg_rl, "%s: added %s as port %"PRIu16, + wdp_name(wdp), devname, port_no); + } else { + VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s", + wdp_name(wdp), devname, strerror(error)); + port_no = OFPP_NONE; + } + if (port_nop) { + *port_nop = port_no; + } + return error; +} + +/* Attempts to remove 'wdp''s port numbered 'port_no'. Returns 0 if + * successful, otherwise a positive errno value. + * + * Some wildcarded datapaths might have fixed sets of ports. For these + * datapaths this function will always fail. + * + * Possible error return values include: + * + * - EINVAL: 'port_no' is outside the valid range, or this particular port is + * not removable (e.g. it is the local port). + * + * - ENOENT: 'wdp' currently has no port numbered 'port_no'. + * + * - EOPNOTSUPP: 'wdp' has a fixed set of ports. + */ +int +wdp_port_del(struct wdp *wdp, uint16_t port_no) +{ + int error; + + COVERAGE_INC(wdp_port_del); + + error = (wdp->wdp_class->port_del + ? wdp->wdp_class->port_del(wdp, port_no) + : EOPNOTSUPP); + log_operation(wdp, "port_del", error); + return error; +} + +/* Looks up port number 'port_no' in 'wdp'. On success, returns 0 and + * initializes 'port' with port details. On failure, returns a positive errno + * value and clears the contents of 'port' (with wdp_port_clear()). + * + * The caller must not modify or free the returned wdp_port. Calling + * wdp_run() or wdp_port_poll() may free the returned wdp_port. + * + * Possible error return values include: + * + * - EINVAL: 'port_no' is outside the valid range. + * + * - ENOENT: 'wdp' currently has no port numbered 'port_no'. + */ +int +wdp_port_query_by_number(const struct wdp *wdp, uint16_t port_no, + struct wdp_port *port) +{ + int error; + + error = wdp->wdp_class->port_query_by_number(wdp, port_no, port); + if (!error) { + VLOG_DBG_RL(&wdpmsg_rl, "%s: port %"PRIu16" is device %s", + wdp_name(wdp), port_no, port->devname); + } else { + wdp_port_clear(port); + VLOG_WARN_RL(&error_rl, "%s: failed to query port %"PRIu16": %s", + wdp_name(wdp), port_no, strerror(error)); + } + return error; +} + +/* Same as wdp_port_query_by_number() except that it look for a port named + * 'devname' in 'wdp'. + * + * Possible error return values include: + * + * - ENODEV: No device named 'devname' exists. + * + * - ENOENT: 'devname' exists but it is not attached as a port on 'wdp'. + */ +int +wdp_port_query_by_name(const struct wdp *wdp, const char *devname, + struct wdp_port *port) +{ + int error = wdp->wdp_class->port_query_by_name(wdp, devname, port); + if (!error) { + VLOG_DBG_RL(&wdpmsg_rl, "%s: device %s is on port %"PRIu16, + wdp_name(wdp), devname, port->opp.port_no); + } else { + wdp_port_clear(port); + + /* Log level is DBG here because all the current callers are interested + * in whether 'wdp' actually has a port 'devname', so that it's not + * an issue worth logging if it doesn't. */ + VLOG_DBG_RL(&error_rl, "%s: failed to query port %s: %s", + wdp_name(wdp), devname, strerror(error)); + } + return error; +} + +/* Looks up port number 'port_no' in 'wdp'. On success, returns 0 and stores + * a copy of the port's name in '*namep'. On failure, returns a positive errno + * value and stores NULL in '*namep'. + * + * Error return values are the same as for wdp_port_query_by_name(). + * + * The caller is responsible for freeing '*namep' (with free()). */ +int +wdp_port_get_name(struct wdp *wdp, uint16_t port_no, char **namep) +{ + struct wdp_port port; + int error; + + error = wdp_port_query_by_number(wdp, port_no, &port); + *namep = port.devname; + port.devname = NULL; + wdp_port_free(&port); + + return error; +} + +/* Obtains a list of all the ports in 'wdp', in no particular order. + * + * If successful, returns 0 and sets '*portsp' to point to an array of struct + * wdp_port and '*n_portsp' to the number of pointers in the array. On + * failure, returns a positive errno value and sets '*portsp' to NULL and + * '*n_portsp' to 0. + * + * The caller is responsible for freeing '*portsp' and the individual wdp_port + * structures, e.g. with wdp_port_array_free(). */ +int +wdp_port_list(const struct wdp *wdp, + struct wdp_port **portsp, size_t *n_portsp) +{ + int error; + + error = wdp->wdp_class->port_list(wdp, portsp, n_portsp); + if (error) { + *portsp = NULL; + *n_portsp = 0; + } + log_operation(wdp, "port_list", error); + return error; +} + +int +wdp_port_set_config(struct wdp *wdp, uint16_t port_no, uint32_t config) +{ + return wdp->wdp_class->port_set_config(wdp, port_no, config); +} + +/* Polls for changes in the set of ports in 'wdp'. If the set of ports in + * 'wdp' has changed, this function does one of the following: + * + * - Stores the name of the device that was added to or deleted from 'wdp' in + * '*devnamep' and returns 0. The caller is responsible for freeing + * '*devnamep' (with free()) when it no longer needs it. + * + * - Returns ENOBUFS and sets '*devnamep' to NULL. + * + * This function may also return 'false positives', where it returns 0 and + * '*devnamep' names a device that was not actually added or deleted or it + * returns ENOBUFS without any change. + * + * Returns EAGAIN if the set of ports in 'wdp' has not changed. May also + * return other positive errno values to indicate that something has gone + * wrong. */ +int +wdp_port_poll(const struct wdp *wdp, char **devnamep) +{ + int error = (wdp->wdp_class->port_poll + ? wdp->wdp_class->port_poll(wdp, devnamep) + : EAGAIN); + if (error) { + *devnamep = NULL; + } + return error; +} + +/* Arranges for the poll loop to wake up when port_poll(wdp) will return a + * value other than EAGAIN. */ +void +wdp_port_poll_wait(const struct wdp *wdp) +{ + if (wdp->wdp_class->port_poll_wait) { + wdp->wdp_class->port_poll_wait(wdp); + } +} + +/* Deletes all flows from 'wdp'. Returns 0 if successful, otherwise a + * positive errno value. */ +int +wdp_flow_flush(struct wdp *wdp) +{ + int error; + + COVERAGE_INC(wdp_flow_flush); + + error = wdp->wdp_class->flow_flush(wdp); + log_operation(wdp, "flow_flush", error); + return error; +} + +struct wdp_rule * +wdp_flow_get(struct wdp *wdp, const flow_t *flow) +{ + return wdp->wdp_class->flow_get(wdp, flow); +} + +struct wdp_rule * +wdp_flow_match(struct wdp *wdp, const flow_t *flow) +{ + return wdp->wdp_class->flow_match(wdp, flow); +} + +void +wdp_flow_for_each_match(const struct wdp *wdp, const flow_t *target, + int include, wdp_flow_cb_func *callback, void *aux) +{ + wdp->wdp_class->flow_for_each_match(wdp, target, include, + callback, aux); +} + +int +wdp_flow_get_stats(const struct wdp *wdp, const struct wdp_rule *rule, + struct wdp_flow_stats *stats) +{ + int error = wdp->wdp_class->flow_get_stats(wdp, rule, stats); + if (error) { + memset(stats, 0, sizeof *stats); + } + return error; +} + +bool +wdp_flow_overlaps(const struct wdp *wdp, const flow_t *flow) +{ + return wdp->wdp_class->flow_overlaps(wdp, flow); +} + +int +wdp_flow_put(struct wdp *wdp, struct wdp_flow_put *put, + struct wdp_flow_stats *old_stats, struct wdp_rule **rulep) +{ + int error = wdp->wdp_class->flow_put(wdp, put, old_stats, rulep); + if (error) { + if (old_stats) { + memset(old_stats, 0, sizeof *old_stats); + } + if (rulep) { + *rulep = NULL; + } + } + return error; +} + +int +wdp_flow_delete(struct wdp *wdp, struct wdp_rule *rule, + struct wdp_flow_stats *final_stats) +{ + int error = wdp->wdp_class->flow_delete(wdp, rule, final_stats); + if (error && final_stats) { + memset(final_stats, 0, sizeof *final_stats); + } + return error; +} + +int +wdp_flow_inject(struct wdp *wdp, struct wdp_rule *rule, + uint16_t in_port, const struct ofpbuf *packet) +{ + return wdp->wdp_class->flow_inject(wdp, rule, in_port, packet); +} + +int +wdp_execute(struct wdp *wdp, uint16_t in_port, + const union ofp_action actions[], size_t n_actions, + const struct ofpbuf *buf) +{ + int error; + + COVERAGE_INC(wdp_execute); + if (n_actions > 0) { + error = wdp->wdp_class->execute(wdp, in_port, actions, + n_actions, buf); + } else { + error = 0; + } + return error; +} + +/* Retrieves 'wdp''s "listen mask" into '*listen_mask'. Each bit set in + * '*listen_mask' indicates that wdp_recv() will receive messages of the + * corresponding WDP_CHAN_* type. Returns 0 if successful, otherwise a + * positive errno value. */ +int +wdp_recv_get_mask(const struct wdp *wdp, int *listen_mask) +{ + int error = wdp->wdp_class->recv_get_mask(wdp, listen_mask); + if (error) { + *listen_mask = 0; + } + log_operation(wdp, "recv_get_mask", error); + return error; +} + +/* Sets 'wdp''s "listen mask" to 'listen_mask'. Each bit set in + * '*listen_mask' requests that wdp_recv() receive messages of the + * corresponding WDP_CHAN_* type. Returns 0 if successful, otherwise a + * positive errno value. */ +int +wdp_recv_set_mask(struct wdp *wdp, int listen_mask) +{ + int error = wdp->wdp_class->recv_set_mask(wdp, listen_mask); + log_operation(wdp, "recv_set_mask", error); + return error; +} + +/* Retrieve the sFlow sampling probability. '*probability' is expressed as the + * number of packets out of UINT_MAX to sample, e.g. probability/UINT_MAX is + * the probability of sampling a given packet. + * + * Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP + * indicates that 'wdp' does not support sFlow sampling. */ +int +wdp_get_sflow_probability(const struct wdp *wdp, uint32_t *probability) +{ + int error = (wdp->wdp_class->get_sflow_probability + ? wdp->wdp_class->get_sflow_probability(wdp, probability) + : EOPNOTSUPP); + if (error) { + *probability = 0; + } + log_operation(wdp, "get_sflow_probability", error); + return error; +} + +/* Set the sFlow sampling probability. 'probability' is expressed as the + * number of packets out of UINT_MAX to sample, e.g. probability/UINT_MAX is + * the probability of sampling a given packet. + * + * Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP + * indicates that 'wdp' does not support sFlow sampling. */ +int +wdp_set_sflow_probability(struct wdp *wdp, uint32_t probability) +{ + int error = (wdp->wdp_class->set_sflow_probability + ? wdp->wdp_class->set_sflow_probability(wdp, probability) + : EOPNOTSUPP); + log_operation(wdp, "set_sflow_probability", error); + return error; +} + +/* Attempts to receive a message from 'wdp'. If successful, stores the + * message into '*packetp'. Only messages of the types selected with + * wdp_set_listen_mask() will ordinarily be received (but if a message type + * is enabled and then later disabled, some stragglers might pop up). + * + * Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN + * if no message is immediately available. */ +int +wdp_recv(struct wdp *wdp, struct wdp_packet *packet) +{ + int error = wdp->wdp_class->recv(wdp, packet); + if (!error) { + /* XXX vlog_dbg received packet */ + } else { + memset(packet, 0, sizeof *packet); + packet->channel = -1; + } + return error; +} + +/* Discards all messages that would otherwise be received by wdp_recv() on + * 'wdp'. Returns 0 if successful, otherwise a positive errno value. */ +int +wdp_recv_purge(struct wdp *wdp) +{ + struct wdp_stats stats; + unsigned int i; + int error; + + COVERAGE_INC(wdp_purge); + + error = wdp_get_wdp_stats(wdp, &stats); + if (error) { + return error; + } + + for (i = 0; i < stats.max_miss_queue + stats.max_action_queue + stats.max_sflow_queue; i++) { + struct wdp_packet packet; + + error = wdp_recv(wdp, &packet); + if (error) { + return error == EAGAIN ? 0 : error; + } + ofpbuf_delete(packet.payload); + } + return 0; +} + +/* Arranges for the poll loop to wake up when 'wdp' has a message queued to be + * received with wdp_recv(). */ +void +wdp_recv_wait(struct wdp *wdp) +{ + wdp->wdp_class->recv_wait(wdp); +} + +/* Obtains the NetFlow engine type and engine ID for 'wdp' into '*engine_type' + * and '*engine_id', respectively. */ +void +wdp_get_netflow_ids(const struct wdp *wdp, + uint8_t *engine_type, uint8_t *engine_id) +{ + *engine_type = wdp->netflow_engine_type; + *engine_id = wdp->netflow_engine_id; +} + ++/* Returns a copy of 'old'. The packet's payload, if any, is copied as well, ++ * but if it is longer than 'trim' bytes it is truncated to that length. */ ++struct wdp_packet * ++wdp_packet_clone(const struct wdp_packet *old, size_t trim) ++{ ++ struct wdp_packet *new = xmemdup(old, sizeof *old); ++ if (old->payload) { ++ new->payload = ofpbuf_clone_data(old->payload->data, ++ MIN(trim, old->payload->size)); ++ } ++ return new; ++} ++ +void +wdp_packet_destroy(struct wdp_packet *packet) +{ + if (packet) { + ofpbuf_delete(packet->payload); + free(packet); + } +} + +void +wdp_init(struct wdp *wdp, const struct wdp_class *wdp_class, + const char *name, + uint8_t netflow_engine_type, uint8_t netflow_engine_id) +{ + wdp->wdp_class = wdp_class; + wdp->base_name = xstrdup(name); + wdp->full_name = xasprintf("%s@%s", wdp_class->type, name); + wdp->netflow_engine_type = netflow_engine_type; + wdp->netflow_engine_id = netflow_engine_id; +} + +/* Undoes the results of initialization. + * + * Normally this function only needs to be called from wdp_close(). + * However, it may be called by providers due to an error on opening + * that occurs after initialization. It this case wdp_close() would + * never be called. */ +void +wdp_uninit(struct wdp *wdp, bool close) +{ + char *base_name = wdp->base_name; + char *full_name = wdp->full_name; + + if (close) { + wdp->wdp_class->close(wdp); + } + + free(base_name); + free(full_name); +} + +static void +log_operation(const struct wdp *wdp, const char *operation, int error) +{ + if (!error) { + VLOG_DBG_RL(&wdpmsg_rl, "%s: %s success", wdp_name(wdp), operation); + } else { + VLOG_WARN_RL(&error_rl, "%s: %s failed (%s)", + wdp_name(wdp), operation, strerror(error)); + } +} diff --cc ofproto/wdp.h index ac9c01076,000000000..0b90bd43c mode 100644,000000..100644 --- a/ofproto/wdp.h +++ b/ofproto/wdp.h @@@ -1,230 -1,0 +1,239 @@@ +/* + * Copyright (c) 2010 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef WDP_H +#define WDP_H 1 + +#include "classifier.h" +#include "list.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct ofpbuf; +struct svec; +struct wdp; +struct wdp_class; +union ofp_action; + +struct wdp_table_stats { + /* Flows. */ + unsigned int n_flows; /* Number of flows in table. */ + unsigned int cur_capacity; /* Current flow table capacity. */ + unsigned int max_capacity; /* Maximum expansion of flow table capacity. */ + + /* Lookups. */ + unsigned long long int n_hit; /* Number of flow table matches. */ + unsigned long long int n_missed; /* Number of flow table misses. */ + unsigned long long int n_lost; /* Misses dropped due to buffer limits. */ +}; + +struct wdp_stats { + struct wdp_table_stats exact; + struct wdp_table_stats wild; + + /* Ports. */ + unsigned int n_ports; /* Current number of ports. */ + unsigned int max_ports; /* Maximum supported number of ports. */ + + /* Lookups. */ + unsigned long long int n_frags; /* Number of dropped IP fragments. */ + + /* Queues. */ + unsigned int max_miss_queue; /* Max length of WDP_CHAN_MISS queue. */ + unsigned int max_action_queue; /* Max length of WDP_CHAN_ACTION queue. */ + unsigned int max_sflow_queue; /* Max length of WDP_CHAN_SFLOW queue. */ +}; + +struct wdp_rule { + struct cls_rule cr; + - union ofp_action *actions; /* OpenFlow actions. */ - int n_actions; /* Number of elements in 'actions' array. */ + long long int created; /* Time created, in ms since the epoch. */ + uint16_t idle_timeout; /* In seconds from time of last use. */ + uint16_t hard_timeout; /* In seconds from time of creation. */ + ++ /* OpenFlow actions. ++ * ++ * 'n_actions' is the number of elements in the 'actions' array. A single ++ * action may take up more more than one element's worth of space. ++ * ++ * A subrule has no actions (it uses the super-rule's actions). */ ++ union ofp_action *actions; /* OpenFlow actions. */ ++ int n_actions; /* Number of elements in 'actions' array. */ ++ + void *client_data; +}; + +void wdp_rule_init(struct wdp_rule *, const union ofp_action *actions, + size_t n_actions); +void wdp_rule_uninit(struct wdp_rule *); + +void wdp_run(void); +void wdp_wait(void); + +int wdp_register_provider(const struct wdp_class *); +int wdp_unregister_provider(const char *type); +void wdp_enumerate_types(struct svec *types); + +int wdp_enumerate_names(const char *type, struct svec *names); +void wdp_parse_name(const char *datapath_name, char **name, char **type); + +void wdp_run_expiration(struct wdp *); +void wdp_run_revalidation(struct wdp *, bool revalidate_all); + +int wdp_open(const char *name, const char *type, struct wdp **); +int wdp_create(const char *name, const char *type, struct wdp **); +int wdp_create_and_open(const char *name, const char *type, struct wdp **); +void wdp_close(struct wdp *); + +const char *wdp_name(const struct wdp *); +const char *wdp_base_name(const struct wdp *); +int wdp_get_all_names(const struct wdp *, struct svec *); + +int wdp_delete(struct wdp *); + +int wdp_get_features(const struct wdp *, struct ofpbuf **featuresp); +int wdp_get_wdp_stats(const struct wdp *, struct wdp_stats *); + +int wdp_get_drop_frags(const struct wdp *, bool *drop_frags); +int wdp_set_drop_frags(struct wdp *, bool drop_frags); + +struct wdp_port { + struct netdev *netdev; + struct ofp_phy_port opp; /* In *host* byte order. */ + char *devname; /* Network device name. */ + bool internal; +}; +void wdp_port_clear(struct wdp_port *); +void wdp_port_copy(struct wdp_port *, const struct wdp_port *); +void wdp_port_free(struct wdp_port *); +void wdp_port_array_free(struct wdp_port *, size_t n); + +int wdp_port_add(struct wdp *, const char *devname, bool internal, + uint16_t *port_no); +int wdp_port_del(struct wdp *, uint16_t port_no); +int wdp_port_query_by_number(const struct wdp *, uint16_t port_no, + struct wdp_port *); +int wdp_port_query_by_name(const struct wdp *, const char *devname, + struct wdp_port *); +int wdp_port_get_name(struct wdp *, uint16_t port_no, char **namep); +int wdp_port_list(const struct wdp *, struct wdp_port **, size_t *n_ports); + +int wdp_port_set_config(struct wdp *, uint16_t port_no, uint32_t config); + +int wdp_port_poll(const struct wdp *, char **devnamep); +void wdp_port_poll_wait(const struct wdp *); + +int wdp_flow_flush(struct wdp *); + +struct wdp_flow_stats { + unsigned long long int n_packets; /* Number of matched packets. */ + unsigned long long int n_bytes; /* Number of matched bytes. */ + long long int inserted; /* Time inserted into flow table. */ + long long int used; /* Time last used. */ + uint8_t tcp_flags; /* Bitwise-OR of TCP flags seen. */ + uint8_t ip_tos; /* IP TOS for most recent packet. */ +}; + +/* Finding and inspecting flows. */ +struct wdp_rule *wdp_flow_get(struct wdp *, const flow_t *); +struct wdp_rule *wdp_flow_match(struct wdp *, const flow_t *); + +typedef void wdp_flow_cb_func(struct wdp_rule *, void *aux); +void wdp_flow_for_each_match(const struct wdp *, const flow_t *, + int include, wdp_flow_cb_func *, void *aux); + +int wdp_flow_get_stats(const struct wdp *, const struct wdp_rule *, + struct wdp_flow_stats *); +bool wdp_flow_overlaps(const struct wdp *, const flow_t *); + +/* Modifying flows. */ +enum wdp_flow_put_flags { + /* At least one of these flags should be set. */ + WDP_PUT_CREATE = 1 << 0, /* Allow creating a new flow. */ + WDP_PUT_MODIFY = 1 << 1, /* Allow modifying an existing flow. */ + + /* Options used only for modifying existing flows. */ + WDP_PUT_COUNTERS = 1 << 2, /* Clear counters, TCP flags, IP TOS, used. */ + WDP_PUT_ACTIONS = 1 << 3, /* Update actions. */ + WDP_PUT_INSERTED = 1 << 4, /* Update 'inserted' to current time. */ + WDP_PUT_TIMEOUTS = 1 << 5, /* Update 'idle_timeout' and 'hard_timeout'. */ + WDP_PUT_ALL = (WDP_PUT_COUNTERS | WDP_PUT_ACTIONS + | WDP_PUT_INSERTED | WDP_PUT_TIMEOUTS) +}; + +struct wdp_flow_put { + enum wdp_flow_put_flags flags; + + const flow_t *flow; + + const union ofp_action *actions; + size_t n_actions; + + unsigned short int idle_timeout; + unsigned short int hard_timeout; +}; + +int wdp_flow_put(struct wdp *, struct wdp_flow_put *, + struct wdp_flow_stats *old_stats, + struct wdp_rule **rulep); +int wdp_flow_delete(struct wdp *, struct wdp_rule *, + struct wdp_flow_stats *final_stats); + +/* Sending packets in flows. */ +int wdp_flow_inject(struct wdp *, struct wdp_rule *, + uint16_t in_port, const struct ofpbuf *); +int wdp_execute(struct wdp *, uint16_t in_port, + const union ofp_action[], size_t n_actions, + const struct ofpbuf *); + +/* Receiving packets that miss the flow table. */ +enum wdp_channel { + WDP_CHAN_MISS, /* Packet missed in flow table. */ + WDP_CHAN_ACTION, /* Packet output to OFPP_CONTROLLER. */ + WDP_CHAN_SFLOW, /* sFlow samples. */ + WDP_N_CHANS +}; + +struct wdp_packet { + struct list list; + enum wdp_channel channel; ++ uint32_t tun_id; + uint16_t in_port; + int send_len; + struct ofpbuf *payload; +}; + ++struct wdp_packet *wdp_packet_clone(const struct wdp_packet *, size_t); +void wdp_packet_destroy(struct wdp_packet *); + +int wdp_recv_get_mask(const struct wdp *, int *listen_mask); +int wdp_recv_set_mask(struct wdp *, int listen_mask); +int wdp_get_sflow_probability(const struct wdp *, uint32_t *probability); +int wdp_set_sflow_probability(struct wdp *, uint32_t probability); +int wdp_recv(struct wdp *, struct wdp_packet *); +int wdp_recv_purge(struct wdp *); +void wdp_recv_wait(struct wdp *); + +void wdp_get_netflow_ids(const struct wdp *, + uint8_t *engine_type, uint8_t *engine_id); + +#ifdef __cplusplus +} +#endif + +#endif /* wdp.h */ diff --cc tests/test-classifier.c index ec9e47556,c831559ba..b9032cf15 --- a/tests/test-classifier.c +++ b/tests/test-classifier.c @@@ -462,7 -469,8 +469,7 @@@ make_rule(int wc_fields, unsigned int p } rule = xzalloc(sizeof *rule); - cls_rule_from_flow(&rule->cls_rule, &flow); - cls_rule_from_flow(&flow, wildcards, !wildcards ? UINT_MAX : priority, - &rule->cls_rule); ++ cls_rule_from_flow(&flow, &rule->cls_rule); return rule; } diff --cc tests/test-flows.c index 5bb990033,424dd7b01..a9d1f0ad7 --- a/tests/test-flows.c +++ b/tests/test-flows.c @@@ -67,8 -67,8 +67,8 @@@ main(int argc OVS_UNUSED, char *argv[] ovs_fatal(retval, "error reading pcap file"); } - flow_extract(packet, 1, &flow); - flow_to_match(&flow, &extracted_match); + flow_extract(packet, 0, 1, &flow); - flow_to_match(&flow, 0, false, &extracted_match); ++ flow_to_match(&flow, false, &extracted_match); if (memcmp(&expected_match, &extracted_match, sizeof expected_match)) { char *exp_s = ofp_match_to_string(&expected_match, 2); diff --cc utilities/ovs-dpctl.c index a696ed9b2,ecfb3069a..c4937a309 --- a/utilities/ovs-dpctl.c +++ b/utilities/ovs-dpctl.c @@@ -473,14 -473,14 +473,15 @@@ do_dump_flows(int argc OVS_UNUSED, cha f->actions = actions; f->n_actions = MAX_ACTIONS; - xfif_flow_get(xfif, f); - if (!dpif_flow_get(dpif, f)) { ++ if (!xfif_flow_get(xfif, f)) { + - ds_clear(&ds); - format_xflow_flow(&ds, f); - printf("%s\n", ds_cstr(&ds)); + ds_clear(&ds); - format_odp_flow(&ds, f); ++ format_xflow_flow(&ds, f); + printf("%s\n", ds_cstr(&ds)); + } } ds_destroy(&ds); - dpif_close(dpif); + xfif_close(xfif); } static void diff --cc utilities/ovs-ofctl.c index 6f90fbcad,1a0c9363e..fbccdcb63 --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@@ -214,14 -214,15 +214,15 @@@ open_vconn_socket(const char *name, str } static void - open_vconn(const char *name, struct vconn **vconnp) + open_vconn__(const char *name, const char *default_suffix, + struct vconn **vconnp) { - struct dpif *dpif; + struct xfif *xfif; struct stat s; char *bridge_path, *datapath_name, *datapath_type; - bridge_path = xasprintf("%s/%s.mgmt", ovs_rundir, name); + bridge_path = xasprintf("%s/%s.%s", ovs_rundir, name, default_suffix); - dp_parse_name(name, &datapath_name, &datapath_type); + xf_parse_name(name, &datapath_name, &datapath_type); if (strstr(name, ":")) { run(vconn_open_block(name, OFP_VERSION, vconnp), @@@ -230,18 -231,19 +231,19 @@@ open_vconn_socket(name, vconnp); } else if (!stat(bridge_path, &s) && S_ISSOCK(s.st_mode)) { open_vconn_socket(bridge_path, vconnp); - } else if (!dpif_open(datapath_name, datapath_type, &dpif)) { - char dpif_name[IF_NAMESIZE + 1]; + } else if (!xfif_open(datapath_name, datapath_type, &xfif)) { + char xfif_name[IF_NAMESIZE + 1]; char *socket_name; - run(dpif_port_get_name(dpif, ODPP_LOCAL, dpif_name, sizeof dpif_name), - "obtaining name of %s", dpif_name); - dpif_close(dpif); - if (strcmp(dpif_name, name)) { - VLOG_INFO("datapath %s is named %s", name, dpif_name); + run(xfif_port_get_name(xfif, XFLOWP_LOCAL, xfif_name, sizeof xfif_name), + "obtaining name of %s", xfif_name); + xfif_close(xfif); + if (strcmp(xfif_name, name)) { + VLOG_INFO("datapath %s is named %s", name, xfif_name); } - socket_name = xasprintf("%s/%s.mgmt", ovs_rundir, xfif_name); + socket_name = xasprintf("%s/%s.%s", - ovs_rundir, dpif_name, default_suffix); ++ ovs_rundir, xfif_name, default_suffix); if (stat(socket_name, &s)) { ovs_fatal(errno, "cannot connect to %s: stat failed on %s", name, socket_name); diff --cc utilities/ovs-openflowd.8.in index 750e02e71,e50a4a7ab..c9fd2abfd --- a/utilities/ovs-openflowd.8.in +++ b/utilities/ovs-openflowd.8.in @@@ -16,10 -16,10 +16,10 @@@ OpenFlow controllers over TCP or SSL The mandatory \fIdatapath\fR argument argument specifies the local datapath to relay. It takes one of the following forms: . -.so lib/dpif.man +.so lib/xfif.man . .PP - The optional \fIcontroller\fR argument specifies how to connect to + The optional \fIcontroller\fR arguments specify how to connect to the OpenFlow controller. It takes one of the following forms: . .so lib/vconn-active.man diff --cc utilities/ovs-openflowd.c index f4c831bc3,e84a3999c..8e5b4d5ef --- a/utilities/ovs-openflowd.c +++ b/utilities/ovs-openflowd.c @@@ -507,15 -468,40 +469,40 @@@ parse_options(int argc, char *argv[], s = stream_ssl_is_configured() ? "^ssl:.*" : "^tcp:.*"; } - /* Mode of operation. */ - s->discovery = s->controller_name == NULL; - if (s->discovery && !s->in_band) { - ovs_fatal(0, "Cannot perform discovery with out-of-band control"); + /* Rate limiting. */ + if (controller_opts.rate_limit && controller_opts.rate_limit < 100) { + VLOG_WARN("Rate limit set to unusually low value %d", + controller_opts.rate_limit); } - /* Rate limiting. */ - if (s->rate_limit && s->rate_limit < 100) { - VLOG_WARN("Rate limit set to unusually low value %d", s->rate_limit); + /* Local vconns. */ - dp_parse_name(argv[0], &s->dp_name, &s->dp_type); ++ xf_parse_name(argv[0], &s->dp_name, &s->dp_type); + + /* Controllers. */ + s->n_controllers = argc > 1 ? argc - 1 : 1; + s->controllers = xmalloc(s->n_controllers * sizeof *s->controllers); + if (argc > 1) { + size_t i; + + for (i = 0; i < s->n_controllers; i++) { + s->controllers[i] = controller_opts; + s->controllers[i].target = argv[i + 1]; + } + } else { + s->controllers[0] = controller_opts; + s->controllers[0].target = "discover"; + } + + /* Sanity check. */ + if (controller_opts.band == OFPROTO_OUT_OF_BAND) { + size_t i; + + for (i = 0; i < s->n_controllers; i++) { + if (!strcmp(s->controllers[i].target, "discover")) { + ovs_fatal(0, "Cannot perform discovery with out-of-band " + "control"); + } + } } } diff --cc vswitchd/bridge.c index fcba05b05,354d4d894..373e41ca7 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@@ -337,8 -341,9 +341,9 @@@ bridge_init(const struct ovsrec_open_vs } } } + svec_destroy(&bridge_names); - svec_destroy(&dpif_names); - svec_destroy(&dpif_types); + svec_destroy(&xfif_names); + svec_destroy(&xfif_types); unixctl_command_register("bridge/dump-flows", bridge_unixctl_dump_flows, NULL); @@@ -1165,16 -1232,17 +1233,17 @@@ bridge_destroy(struct bridge *br port_destroy(br->ports[br->n_ports - 1]); } list_remove(&br->node); - error = dpif_delete(br->dpif); + error = xfif_delete(br->xfif); if (error && error != ENOENT) { VLOG_ERR("failed to delete %s: %s", - dpif_name(br->dpif), strerror(error)); + xfif_name(br->xfif), strerror(error)); } - dpif_close(br->dpif); + xfif_close(br->xfif); ofproto_destroy(br->ofproto); - free(br->controller); mac_learning_destroy(br->ml); port_array_destroy(&br->ifaces); + shash_destroy(&br->port_by_name); + shash_destroy(&br->iface_by_name); free(br->ports); free(br->name); free(br); @@@ -1581,16 -1595,78 +1596,79 @@@ bridge_reconfigure_remotes(const struc action.output.len = htons(sizeof action); action.output.port = htons(OFPP_NORMAL); memset(&flow, 0, sizeof flow); - flow.wildcards = OFPFW_ALL; - ofproto_add_flow(br->ofproto, &flow, OVSFW_ALL, 0, &action, 1, 0); ++ flow.wildcards = OVSFW_ALL; + ofproto_add_flow(br->ofproto, &flow, &action, 1, 0); + } else { + struct ofproto_controller *ocs; + size_t i; - ofproto_set_in_band(br->ofproto, false); - ofproto_set_max_backoff(br->ofproto, 1); - ofproto_set_probe_interval(br->ofproto, 5); - ofproto_set_failure(br->ofproto, false); - } + ocs = xmalloc(n_controllers * sizeof *ocs); + for (i = 0; i < n_controllers; i++) { + struct ovsrec_controller *c = controllers[i]; + struct ofproto_controller *oc = &ocs[i]; + + if (strcmp(c->target, "discover")) { + struct iface *local_iface; + struct in_addr ip; + + local_iface = bridge_get_local_iface(br); + if (local_iface && c->local_ip + && inet_aton(c->local_ip, &ip)) { + struct netdev *netdev = local_iface->netdev; + struct in_addr mask, gateway; + + if (!c->local_netmask + || !inet_aton(c->local_netmask, &mask)) { + mask.s_addr = 0; + } + if (!c->local_gateway + || !inet_aton(c->local_gateway, &gateway)) { + gateway.s_addr = 0; + } + + netdev_turn_flags_on(netdev, NETDEV_UP, true); + if (!mask.s_addr) { + mask.s_addr = guess_netmask(ip.s_addr); + } + if (!netdev_set_in4(netdev, ip, mask)) { + VLOG_INFO("bridge %s: configured IP address "IP_FMT", " + "netmask "IP_FMT, + br->name, IP_ARGS(&ip.s_addr), + IP_ARGS(&mask.s_addr)); + } + + if (gateway.s_addr) { + if (!netdev_add_router(netdev, gateway)) { + VLOG_INFO("bridge %s: configured gateway "IP_FMT, + br->name, IP_ARGS(&gateway.s_addr)); + } + } + } + } - ofproto_set_controller(br->ofproto, br->controller); + oc->target = c->target; + oc->max_backoff = c->max_backoff ? *c->max_backoff / 1000 : 8; + oc->probe_interval = (c->inactivity_probe + ? *c->inactivity_probe / 1000 : 5); + oc->fail = (!c->fail_mode + || !strcmp(c->fail_mode, "standalone") + || !strcmp(c->fail_mode, "open") + ? OFPROTO_FAIL_STANDALONE + : OFPROTO_FAIL_SECURE); + oc->band = (!c->connection_mode + || !strcmp(c->connection_mode, "in-band") + ? OFPROTO_IN_BAND + : OFPROTO_OUT_OF_BAND); + oc->accept_re = c->discover_accept_regex; + oc->update_resolv_conf = c->discover_update_resolv_conf; + oc->rate_limit = (c->controller_rate_limit + ? *c->controller_rate_limit : 0); + oc->burst_limit = (c->controller_burst_limit + ? *c->controller_burst_limit : 0); + } + ofproto_set_controllers(br->ofproto, ocs, n_controllers); + free(ocs); + } } static void @@@ -2173,15 -2302,13 +2305,13 @@@ is_admissible(struct bridge *br, const { struct iface *in_iface; struct port *in_port; - struct port *out_port = NULL; /* By default, drop the packet/flow. */ int vlan; - int out_port_idx; /* Find the interface and port structure for the received packet. */ - in_iface = iface_from_dp_ifidx(br, flow->in_port); + in_iface = iface_from_xf_ifidx(br, flow->in_port); if (!in_iface) { /* No interface? Something fishy... */ - if (packet != NULL) { + if (have_packet) { /* Odd. A few possible reasons here: * * - We deleted an interface but there are still a few packets @@@ -2245,8 -2374,28 +2377,28 @@@ } } - /* MAC learning. */ - out_port = FLOOD_PORT; + return true; + } + + /* If the composed actions may be applied to any packet in the given 'flow', + * returns true. Otherwise, the actions should only be applied to 'packet', or + * not at all, if 'packet' was NULL. */ + static bool + process_flow(struct bridge *br, const flow_t *flow, - const struct ofpbuf *packet, struct odp_actions *actions, ++ const struct ofpbuf *packet, struct xflow_actions *actions, + tag_type *tags, uint16_t *nf_output_iface) + { + struct port *in_port; + struct port *out_port; + int vlan; + int out_port_idx; + + /* Check whether we should drop packets in this flow. */ + if (!is_admissible(br, flow, packet != NULL, tags, &vlan, &in_port)) { + out_port = NULL; + goto done; + } + /* Learn source MAC (but don't try to learn from revalidation). */ if (packet) { update_learning_table(br, flow, vlan, in_port); @@@ -2333,8 -2485,10 +2488,10 @@@ bridge_account_flow_ofhook_cb(const flo void *br_) { struct bridge *br = br_; - struct port *in_port; - const union odp_action *a; + const union xflow_action *a; + struct port *in_port; + tag_type tags = 0; + int vlan; /* Feed information from the active flows back into the learning table * to ensure that table is always in sync with what is actually flowing @@@ -2738,7 -2884,7 +2887,7 @@@ bond_send_learning_packets(struct port n_packets++; compose_benign_packet(&packet, "Open vSwitch Bond Failover", 0xf177, e->mac); - flow_extract(&packet, XFLOWP_NONE, &flow); - flow_extract(&packet, 0, ODPP_NONE, &flow); ++ flow_extract(&packet, 0, XFLOWP_NONE, &flow); retval = ofproto_send_packet(br->ofproto, &flow, actions, a - actions, &packet); if (retval) { @@@ -3471,8 -3654,10 +3657,10 @@@ iface_destroy(struct iface *iface bool del_active = port->active_iface == iface->port_ifidx; struct iface *del; + shash_find_and_delete_assert(&br->iface_by_name, iface->name); + - if (iface->dp_ifidx >= 0) { - port_array_set(&br->ifaces, iface->dp_ifidx, NULL); + if (iface->xf_ifidx >= 0) { + port_array_set(&br->ifaces, iface->xf_ifidx, NULL); } del = port->ifaces[iface->port_ifidx] = port->ifaces[--port->n_ifaces];