From: Ben Pfaff Date: Thu, 22 Jul 2010 19:31:55 +0000 (-0700) Subject: Merge "master" into "wdp". X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=966b5d43f2bc060dca3c19767db0a3dcd1ebb799;p=sliver-openvswitch.git Merge "master" into "wdp". --- 966b5d43f2bc060dca3c19767db0a3dcd1ebb799 diff --cc datapath/actions.c index ca9234c45,1a6cc3570..d8caeb994 --- a/datapath/actions.c +++ b/datapath/actions.c @@@ -21,11 -21,10 +21,10 @@@ #include "actions.h" #include "datapath.h" -#include "openvswitch/datapath-protocol.h" +#include "openvswitch/xflow.h" #include "vport.h" - static struct sk_buff * - make_writable(struct sk_buff *skb, unsigned min_headroom, gfp_t gfp) + static struct sk_buff *make_writable(struct sk_buff *skb, unsigned min_headroom, gfp_t gfp) { if (skb_shared(skb) || skb_cloned(skb)) { struct sk_buff *nskb; @@@ -77,17 -69,21 +69,13 @@@ static struct sk_buff *vlan_pull_tag(st return skb; } - - static struct sk_buff * - modify_vlan_tci(struct datapath *dp, struct sk_buff *skb, - struct xflow_key *key, const union xflow_action *a, - int n_actions, gfp_t gfp) + static struct sk_buff *modify_vlan_tci(struct datapath *dp, struct sk_buff *skb, - const struct odp_flow_key *key, - const union odp_action *a, int n_actions, - gfp_t gfp) ++ struct xflow_key *key, const union xflow_action *a, ++ int n_actions, gfp_t gfp) { - u16 tci, mask; - - if (a->type == ODPAT_SET_VLAN_VID) { - tci = ntohs(a->vlan_vid.vlan_vid); - mask = VLAN_VID_MASK; - } else { - tci = a->vlan_pcp.vlan_pcp << VLAN_PCP_SHIFT; - mask = VLAN_PCP_MASK; - } + __be16 mask = a->dl_tci.mask; + __be16 tci = a->dl_tci.tci; - key->dl_tci = (key->dl_tci & ~(mask | VLAN_TAG_PRESENT)) | tci; - skb = make_writable(skb, VLAN_HLEN, gfp); if (!skb) return ERR_PTR(-ENOMEM); @@@ -153,12 -149,11 +141,11 @@@ /* GSO can change the checksum type so update.*/ compute_ip_summed(segs, true); - segs = __vlan_put_tag(segs, tci); + segs = __vlan_put_tag(segs, ntohs(tci)); err = -ENOMEM; if (segs) { - struct xflow_key segkey = *key; err = execute_actions(dp, segs, - &segkey, a + 1, + key, a + 1, n_actions - 1, gfp); } @@@ -210,20 -203,16 +195,16 @@@ static struct sk_buff *strip_vlan(struc } static struct sk_buff *set_dl_addr(struct sk_buff *skb, - struct xflow_key *key, - const struct odp_action_dl_addr *a, + const struct xflow_action_dl_addr *a, gfp_t gfp) { skb = make_writable(skb, 0, gfp); if (skb) { struct ethhdr *eh = eth_hdr(skb); - if (a->type == XFLOWAT_SET_DL_SRC) { - if (a->type == ODPAT_SET_DL_SRC) ++ if (a->type == XFLOWAT_SET_DL_SRC) memcpy(eh->h_source, a->dl_addr, ETH_ALEN); - memcpy(key->dl_src, a->dl_addr, ETH_ALEN); - } else { + else memcpy(eh->h_dest, a->dl_addr, ETH_ALEN); - memcpy(key->dl_dst, a->dl_addr, ETH_ALEN); - } } return skb; } @@@ -249,8 -238,8 +230,8 @@@ static void update_csum(__sum16 *sum, s } static struct sk_buff *set_nw_addr(struct sk_buff *skb, - struct xflow_key *key, - const struct odp_flow_key *key, - const struct odp_action_nw_addr *a, ++ const struct xflow_key *key, + const struct xflow_action_nw_addr *a, gfp_t gfp) { if (key->dl_type != htons(ETH_P_IP)) @@@ -282,8 -266,8 +258,8 @@@ } static struct sk_buff *set_nw_tos(struct sk_buff *skb, - struct xflow_key *key, - const struct odp_flow_key *key, - const struct odp_action_nw_tos *a, ++ const struct xflow_key *key, + const struct xflow_action_nw_tos *a, gfp_t gfp) { if (key->dl_type != htons(ETH_P_IP)) @@@ -306,10 -289,9 +281,10 @@@ return skb; } - static struct sk_buff * - set_tp_port(struct sk_buff *skb, struct xflow_key *key, - const struct xflow_action_tp_port *a, - gfp_t gfp) + static struct sk_buff *set_tp_port(struct sk_buff *skb, - const struct odp_flow_key *key, - const struct odp_action_tp_port *a, gfp_t gfp) ++ const struct xflow_key *key, ++ const struct xflow_action_tp_port *a, ++ gfp_t gfp) { int check_ofs; @@@ -434,8 -395,8 +388,8 @@@ static void sflow_sample(struct datapat /* Execute a list of actions against 'skb'. */ int execute_actions(struct datapath *dp, struct sk_buff *skb, - struct xflow_key *key, - const struct odp_flow_key *key, - const union odp_action *a, int n_actions, ++ const struct xflow_key *key, + const union xflow_action *a, int n_actions, gfp_t gfp) { /* Every output action needs a separate clone of 'skb', but the common @@@ -483,27 -444,28 +437,27 @@@ } break; - case ODPAT_SET_TUNNEL: + case XFLOWAT_SET_TUNNEL: - set_tunnel(skb, key, a->tunnel.tun_id); + OVS_CB(skb)->tun_id = a->tunnel.tun_id; break; - case ODPAT_SET_VLAN_VID: - case ODPAT_SET_VLAN_PCP: + case XFLOWAT_SET_DL_TCI: skb = modify_vlan_tci(dp, skb, key, a, n_actions, gfp); if (IS_ERR(skb)) return PTR_ERR(skb); break; - case ODPAT_STRIP_VLAN: + case XFLOWAT_STRIP_VLAN: - skb = strip_vlan(skb, key, gfp); + skb = strip_vlan(skb, gfp); break; - case ODPAT_SET_DL_SRC: - case ODPAT_SET_DL_DST: + case XFLOWAT_SET_DL_SRC: + case XFLOWAT_SET_DL_DST: - skb = set_dl_addr(skb, key, &a->dl_addr, gfp); + skb = set_dl_addr(skb, &a->dl_addr, gfp); break; - case ODPAT_SET_NW_SRC: - case ODPAT_SET_NW_DST: + case XFLOWAT_SET_NW_SRC: + case XFLOWAT_SET_NW_DST: skb = set_nw_addr(skb, key, &a->nw_addr, gfp); break; diff --cc datapath/actions.h index a1114649a,e4fc39749..7351f6b2d --- a/datapath/actions.h +++ b/datapath/actions.h @@@ -14,16 -14,16 +14,16 @@@ struct datapath; struct sk_buff; -struct odp_flow_key; -union odp_action; +struct xflow_key; +union xflow_action; int execute_actions(struct datapath *dp, struct sk_buff *skb, - struct xflow_key *key, - const struct odp_flow_key *key, - const union odp_action *, int n_actions, ++ const struct xflow_key *key, + const union xflow_action *, int n_actions, gfp_t gfp); - static inline void - set_skb_csum_bits(const struct sk_buff *old_skb, struct sk_buff *new_skb) + static inline void set_skb_csum_bits(const struct sk_buff *old_skb, + struct sk_buff *new_skb) { #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) /* Before 2.6.24 these fields were not copied when diff --cc datapath/datapath.c index 8747a0551,eb260e33a..d8149029d --- a/datapath/datapath.c +++ b/datapath/datapath.c @@@ -1376,10 -1375,9 +1372,9 @@@ void set_internal_devs_mtu(const struc } } - static int - put_port(const struct dp_port *p, struct xflow_port __user *uop) -static int put_port(const struct dp_port *p, struct odp_port __user *uop) ++static int put_port(const struct dp_port *p, struct xflow_port __user *uop) { - struct odp_port op; + struct xflow_port op; memset(&op, 0, sizeof op); @@@ -1393,10 -1391,9 +1388,9 @@@ return copy_to_user(uop, &op, sizeof op) ? -EFAULT : 0; } - static int - query_port(struct datapath *dp, struct xflow_port __user *uport) -static int query_port(struct datapath *dp, struct odp_port __user *uport) ++static int query_port(struct datapath *dp, struct xflow_port __user *uport) { - struct odp_port port; + struct xflow_port port; if (copy_from_user(&port, uport, sizeof port)) return -EFAULT; @@@ -1441,8 -1438,8 +1435,8 @@@ error_unlock return put_port(dp->ports[port.port], uport); } - static int - do_list_ports(struct datapath *dp, struct xflow_port __user *uports, int n_ports) -static int do_list_ports(struct datapath *dp, struct odp_port __user *uports, ++static int do_list_ports(struct datapath *dp, struct xflow_port __user *uports, + int n_ports) { int idx = 0; if (n_ports) { @@@ -1458,10 -1455,9 +1452,9 @@@ return idx; } - static int - list_ports(struct datapath *dp, struct xflow_portvec __user *upv) -static int list_ports(struct datapath *dp, struct odp_portvec __user *upv) ++static int list_ports(struct datapath *dp, struct xflow_portvec __user *upv) { - struct odp_portvec pv; + struct xflow_portvec pv; int retval; if (copy_from_user(&pv, upv, sizeof pv)) @@@ -1513,10 -1509,10 +1506,10 @@@ error return error; } - static int - set_port_group(struct datapath *dp, const struct xflow_port_group __user *upg) + static int set_port_group(struct datapath *dp, - const struct odp_port_group __user *upg) ++ const struct xflow_port_group __user *upg) { - struct odp_port_group pg; + struct xflow_port_group pg; if (copy_from_user(&pg, upg, sizeof pg)) return -EFAULT; diff --cc datapath/vport-netdev.c index 2a46724b0,517249556..d5de215aa --- a/datapath/vport-netdev.c +++ b/datapath/vport-netdev.c @@@ -194,8 -183,7 +183,7 @@@ struct kobject *netdev_get_kobj(const s return &netdev_vport->dev->NETDEV_DEV_MEMBER.kobj; } - int - netdev_get_stats(const struct vport *vport, struct xflow_vport_stats *stats) -int netdev_get_stats(const struct vport *vport, struct odp_vport_stats *stats) ++int netdev_get_stats(const struct vport *vport, struct xflow_vport_stats *stats) { const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); const struct net_device_stats *netdev_stats; diff --cc datapath/vport.c index cfdd3e7dc,712c26e5d..edf9af099 --- a/datapath/vport.c +++ b/datapath/vport.c @@@ -174,8 -181,7 +181,7 @@@ void vport_exit(void kfree(dev_table); } - static int - do_vport_add(struct xflow_vport_add *vport_config) -static int do_vport_add(struct odp_vport_add *vport_config) ++static int do_vport_add(struct xflow_vport_add *vport_config) { struct vport *vport; int err = 0; @@@ -213,25 -219,23 +219,23 @@@ out * on device type). This function is for userspace callers and assumes no * locks are held. */ - int - vport_user_add(const struct xflow_vport_add __user *uvport_config) -int vport_user_add(const struct odp_vport_add __user *uvport_config) ++int vport_user_add(const struct xflow_vport_add __user *uvport_config) { - struct odp_vport_add vport_config; + struct xflow_vport_add vport_config; - if (copy_from_user(&vport_config, uvport_config, sizeof(struct odp_vport_add))) + if (copy_from_user(&vport_config, uvport_config, sizeof(struct xflow_vport_add))) return -EFAULT; return do_vport_add(&vport_config); } #ifdef CONFIG_COMPAT - int - compat_vport_user_add(struct compat_xflow_vport_add *ucompat) -int compat_vport_user_add(struct compat_odp_vport_add *ucompat) ++int compat_vport_user_add(struct compat_xflow_vport_add *ucompat) { - struct compat_odp_vport_add compat; - struct odp_vport_add vport_config; + struct compat_xflow_vport_add compat; + struct xflow_vport_add vport_config; - if (copy_from_user(&compat, ucompat, sizeof(struct compat_odp_vport_add))) + if (copy_from_user(&compat, ucompat, sizeof(struct compat_xflow_vport_add))) return -EFAULT; memcpy(vport_config.port_type, compat.port_type, VPORT_TYPE_SIZE); @@@ -242,8 -246,7 +246,7 @@@ } #endif - static int - do_vport_mod(struct xflow_vport_mod *vport_config) -static int do_vport_mod(struct odp_vport_mod *vport_config) ++static int do_vport_mod(struct xflow_vport_mod *vport_config) { struct vport *vport; int err; @@@ -276,25 -279,23 +279,23 @@@ out * dependent on device type). This function is for userspace callers and * assumes no locks are held. */ - int - vport_user_mod(const struct xflow_vport_mod __user *uvport_config) -int vport_user_mod(const struct odp_vport_mod __user *uvport_config) ++int vport_user_mod(const struct xflow_vport_mod __user *uvport_config) { - struct odp_vport_mod vport_config; + struct xflow_vport_mod vport_config; - if (copy_from_user(&vport_config, uvport_config, sizeof(struct odp_vport_mod))) + if (copy_from_user(&vport_config, uvport_config, sizeof(struct xflow_vport_mod))) return -EFAULT; return do_vport_mod(&vport_config); } #ifdef CONFIG_COMPAT - int - compat_vport_user_mod(struct compat_xflow_vport_mod *ucompat) -int compat_vport_user_mod(struct compat_odp_vport_mod *ucompat) ++int compat_vport_user_mod(struct compat_xflow_vport_mod *ucompat) { - struct compat_odp_vport_mod compat; - struct odp_vport_mod vport_config; + struct compat_xflow_vport_mod compat; + struct xflow_vport_mod vport_config; - if (copy_from_user(&compat, ucompat, sizeof(struct compat_odp_vport_mod))) + if (copy_from_user(&compat, ucompat, sizeof(struct compat_xflow_vport_mod))) return -EFAULT; memcpy(vport_config.devname, compat.devname, IFNAMSIZ); @@@ -375,10 -375,9 +375,9 @@@ out * Retrieves transmit, receive, and error stats for the given device. This * function is for userspace callers and assumes no locks are held. */ - int - vport_user_stats_get(struct xflow_vport_stats_req __user *ustats_req) -int vport_user_stats_get(struct odp_vport_stats_req __user *ustats_req) ++int vport_user_stats_get(struct xflow_vport_stats_req __user *ustats_req) { - struct odp_vport_stats_req stats_req; + struct xflow_vport_stats_req stats_req; struct vport *vport; int err; @@@ -418,10 -417,9 +417,9 @@@ out * -EOPNOTSUPP. This function is for userspace callers and assumes no locks * are held. */ - int - vport_user_stats_set(struct xflow_vport_stats_req __user *ustats_req) -int vport_user_stats_set(struct odp_vport_stats_req __user *ustats_req) ++int vport_user_stats_set(struct xflow_vport_stats_req __user *ustats_req) { - struct odp_vport_stats_req stats_req; + struct xflow_vport_stats_req stats_req; struct vport *vport; int err; @@@ -456,10 -454,9 +454,9 @@@ out * Retrieves the Ethernet address of the given device. This function is for * userspace callers and assumes no locks are held. */ - int - vport_user_ether_get(struct xflow_vport_ether __user *uvport_ether) -int vport_user_ether_get(struct odp_vport_ether __user *uvport_ether) ++int vport_user_ether_get(struct xflow_vport_ether __user *uvport_ether) { - struct odp_vport_ether vport_ether; + struct xflow_vport_ether vport_ether; struct vport *vport; int err = 0; @@@ -500,10 -497,9 +497,9 @@@ out * -EOPNOTSUPP. This function is for userspace callers and assumes no locks * are held. */ - int - vport_user_ether_set(struct xflow_vport_ether __user *uvport_ether) -int vport_user_ether_set(struct odp_vport_ether __user *uvport_ether) ++int vport_user_ether_set(struct xflow_vport_ether __user *uvport_ether) { - struct odp_vport_ether vport_ether; + struct xflow_vport_ether vport_ether; struct vport *vport; int err; @@@ -537,10 -533,9 +533,9 @@@ out * Retrieves the MTU of the given device. This function is for userspace * callers and assumes no locks are held. */ - int - vport_user_mtu_get(struct xflow_vport_mtu __user *uvport_mtu) -int vport_user_mtu_get(struct odp_vport_mtu __user *uvport_mtu) ++int vport_user_mtu_get(struct xflow_vport_mtu __user *uvport_mtu) { - struct odp_vport_mtu vport_mtu; + struct xflow_vport_mtu vport_mtu; struct vport *vport; int err = 0; @@@ -578,10 -573,9 +573,9 @@@ out * MTU, in which case the result will always be -EOPNOTSUPP. This function is * for userspace callers and assumes no locks are held. */ - int - vport_user_mtu_set(struct xflow_vport_mtu __user *uvport_mtu) -int vport_user_mtu_set(struct odp_vport_mtu __user *uvport_mtu) ++int vport_user_mtu_set(struct xflow_vport_mtu __user *uvport_mtu) { - struct odp_vport_mtu vport_mtu; + struct xflow_vport_mtu vport_mtu; struct vport *vport; int err; @@@ -932,8 -913,7 +913,7 @@@ int vport_set_addr(struct vport *vport * support setting the stats, in which case the result will always be * -EOPNOTSUPP. RTNL lock must be held. */ - int - vport_set_stats(struct vport *vport, struct xflow_vport_stats *stats) -int vport_set_stats(struct vport *vport, struct odp_vport_stats *stats) ++int vport_set_stats(struct vport *vport, struct xflow_vport_stats *stats) { ASSERT_RTNL(); @@@ -1032,11 -1007,10 +1007,10 @@@ struct kobject *vport_get_kobj(const st * * Retrieves transmit, receive, and error stats for the given device. */ - int - vport_get_stats(struct vport *vport, struct xflow_vport_stats *stats) -int vport_get_stats(struct vport *vport, struct odp_vport_stats *stats) ++int vport_get_stats(struct vport *vport, struct xflow_vport_stats *stats) { - struct odp_vport_stats dev_stats; - struct odp_vport_stats *dev_statsp = NULL; + struct xflow_vport_stats dev_stats; + struct xflow_vport_stats *dev_statsp = NULL; int err; if (vport->ops->get_stats) { diff --cc lib/flow.c index bd500857e,490c46bc1..c4bf1ade8 --- a/lib/flow.c +++ b/lib/flow.c @@@ -25,13 -25,13 +25,13 @@@ #include "hash.h" #include "ofpbuf.h" #include "openflow/openflow.h" -#include "openvswitch/datapath-protocol.h" +#include "openvswitch/xflow.h" #include "packets.h" #include "unaligned.h" + #include "vlog.h" #include "xtoxll.h" - #include "vlog.h" - #define THIS_MODULE VLM_flow + VLOG_DEFINE_THIS_MODULE(flow) static struct arp_eth_header * pull_arp(struct ofpbuf *packet) diff --cc lib/netdev-gre.c index b55463f48,aecc77edf..81fd9b25c --- a/lib/netdev-gre.c +++ b/lib/netdev-gre.c @@@ -23,14 -23,14 +23,14 @@@ #include "netdev-provider.h" #include "netdev-vport.h" #include "openflow/openflow.h" -#include "openvswitch/datapath-protocol.h" #include "openvswitch/gre.h" +#include "openvswitch/xflow.h" #include "packets.h" #include "socket-util.h" - - #define THIS_MODULE VLM_netdev_gre #include "vlog.h" + VLOG_DEFINE_THIS_MODULE(netdev_gre) + struct netdev_dev_gre { struct netdev_dev netdev_dev; }; @@@ -151,11 -151,11 +151,11 @@@ netdev_gre_create(const char *name, con return err; } - err = netdev_vport_do_ioctl(XFLOW_VPORT_ADD, &ova); - if (err == EEXIST) { - err = netdev_vport_do_ioctl(ODP_VPORT_ADD, &ova); ++ err = netdev_vpor_do_ioctl(XFLOW_VPORT_ADD, &ova); + if (err == EBUSY) { VLOG_WARN("%s: destroying existing device", name); - err = netdev_vport_do_ioctl(ODP_VPORT_DEL, ova.devname); + err = netdev_vport_do_ioctl(XFLOW_VPORT_DEL, ova.devname); if (err) { return err; } diff --cc lib/netdev-patch.c index c018f2460,7e8b1990e..0b7c86fe5 --- a/lib/netdev-patch.c +++ b/lib/netdev-patch.c @@@ -23,13 -23,13 +23,13 @@@ #include "netdev-provider.h" #include "netdev-vport.h" #include "openflow/openflow.h" -#include "openvswitch/datapath-protocol.h" +#include "openvswitch/xflow.h" #include "packets.h" #include "socket-util.h" - - #define THIS_MODULE VLM_netdev_patch #include "vlog.h" + VLOG_DEFINE_THIS_MODULE(netdev_patch) + struct netdev_dev_patch { struct netdev_dev netdev_dev; }; @@@ -102,11 -102,11 +102,11 @@@ netdev_patch_create(const char *name, c ovs_strlcpy(ova.devname, name, sizeof ova.devname); ova.config = (char *)peer; - err = netdev_vport_do_ioctl(ODP_VPORT_ADD, &ova); + err = netdev_vport_do_ioctl(XFLOW_VPORT_ADD, &ova); - if (err == EEXIST) { + if (err == EBUSY) { VLOG_WARN("%s: destroying existing device", name); - err = netdev_vport_do_ioctl(ODP_VPORT_DEL, ova.devname); + err = netdev_vport_do_ioctl(XFLOW_VPORT_DEL, ova.devname); if (err) { return err; } diff --cc lib/netdev-vport.c index 28730b5c8,96834115e..260bfbcc7 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@@ -21,13 -21,13 +21,13 @@@ #include "list.h" #include "netdev-vport.h" -#include "openvswitch/datapath-protocol.h" +#include "openvswitch/xflow.h" #include "shash.h" #include "socket-util.h" - - #define THIS_MODULE VLM_netdev_vport #include "vlog.h" + VLOG_DEFINE_THIS_MODULE(netdev_vport) + struct netdev_vport_notifier { struct netdev_notifier notifier; struct list list_node; diff --cc lib/ofp-util.c index bc96240a3,fd54a03ed..b542c0a50 --- a/lib/ofp-util.c +++ b/lib/ofp-util.c @@@ -754,23 -763,40 +763,61 @@@ normalize_match(struct ofp_match *m m->wildcards = htonl(wc); } +/* Converts all of the fields in 'opp' from host to native byte-order. */ +void +hton_ofp_phy_port(struct ofp_phy_port *opp) +{ + opp->port_no = htons(opp->port_no); + opp->config = htonl(opp->config); + opp->state = htonl(opp->state); + opp->curr = htonl(opp->curr); + opp->advertised = htonl(opp->advertised); + opp->supported = htonl(opp->supported); + opp->peer = htonl(opp->peer); +} + +/* Converts all of the fields in 'opp' from native to host byte-order. */ +void +ntoh_ofp_phy_port(struct ofp_phy_port *opp) +{ + /* ntohX and htonX are really the same functions. */ + hton_ofp_phy_port(opp); +} ++ + /* Returns a string that describes 'match' in a very literal way, without + * interpreting its contents except in a very basic fashion. The returned + * string is intended to be fixed-length, so that it is easy to see differences + * between two such strings if one is put above another. This is useful for + * describing changes made by normalize_match(). + * + * The caller must free the returned string (with free()). */ + char * + ofp_match_to_literal_string(const struct ofp_match *match) + { + return xasprintf("wildcards=%#10"PRIx32" " + " in_port=%5"PRId16" " + " dl_src="ETH_ADDR_FMT" " + " dl_dst="ETH_ADDR_FMT" " + " dl_vlan=%5"PRId16" " + " dl_vlan_pcp=%3"PRId8" " + " dl_type=%#6"PRIx16" " + " nw_tos=%#4"PRIx8" " + " nw_proto=%#4"PRIx16" " + " nw_src=%#10"PRIx32" " + " nw_dst=%#10"PRIx32" " + " tp_src=%5"PRId16" " + " tp_dst=%5"PRId16, + ntohl(match->wildcards), + ntohs(match->in_port), + ETH_ADDR_ARGS(match->dl_src), + ETH_ADDR_ARGS(match->dl_dst), + ntohs(match->dl_vlan), + match->dl_vlan_pcp, + ntohs(match->dl_type), + match->nw_tos, + match->nw_proto, + ntohl(match->nw_src), + ntohl(match->nw_dst), + ntohs(match->tp_src), + ntohs(match->tp_dst)); + } diff --cc lib/ofp-util.h index 5a5474815,b4af179d6..60df77c25 --- a/lib/ofp-util.h +++ b/lib/ofp-util.h @@@ -81,41 -81,8 +81,42 @@@ int validate_actions(const union ofp_ac bool action_outputs_to_port(const union ofp_action *, uint16_t port); void normalize_match(struct ofp_match *); + char *ofp_match_to_literal_string(const struct ofp_match *match); +void hton_ofp_phy_port(struct ofp_phy_port *); +void ntoh_ofp_phy_port(struct ofp_phy_port *); + +/* OpenFlow errors. + * + * OpenFlow errors have two 16-bit parts: a "type" and a "code". A "type" has + * a unique meaning. The "code" values are different for each "type". + * + * We embed OpenFlow errors in the same space as errno values by shifting + * 'type' left 16 bits and adding the 'code'. An "int" value is thus broken + * into a few different ranges: + * + * - 0: success. + * + * - 1...65535: system errno values. + * + * The assumption that system errno values are less than 65536 is true + * on at least Linux, FreeBSD, OpenBSD, and Windows. RFC 1813 defines + * NFSv3-specific errno codes starting at 10000, another hint that this + * is a reasonable assumption. + * + * C and POSIX say that errno values are positive. + * + * - 65536...INT_MAX: OpenFlow errors. + * + * In OpenFlow, a "type" of 0 is valid, but it corresponds to + * OFPET_HELLO_FAILED. That's not a general-purpose error: only the + * vconn library would ever care to send it. So we ignore it. + * + * - negative values: not used. + */ + +/* Returns the OpenFlow error with the specified 'type' and 'code' as an + * integer. */ static inline int ofp_mkerr(uint16_t type, uint16_t code) { diff --cc lib/xfif-linux.c index 68f84a83b,52d73c6bb..7bfa840c1 --- a/lib/xfif-linux.c +++ b/lib/xfif-linux.c @@@ -39,17 -41,16 +40,17 @@@ #include "shash.h" #include "svec.h" #include "util.h" + #include "vlog.h" +#include "xfif-provider.h" - #include "vlog.h" - #define THIS_MODULE VLM_xfif_linux -VLOG_DEFINE_THIS_MODULE(dpif_linux) ++VLOG_DEFINE_THIS_MODULE(xfif_linux) /* Datapath interface for the openvswitch Linux kernel module. */ -struct dpif_linux { - struct dpif dpif; +struct xfif_linux { + struct xfif xfif; int fd; - /* Used by dpif_linux_get_all_names(). */ + /* Used by xfif_linux_get_all_names(). */ char *local_ifname; int minor; @@@ -452,15 -453,27 +453,27 @@@ xfif_linux_get_sflow_probability(const } static int -dpif_linux_set_sflow_probability(struct dpif *dpif_, uint32_t probability) +xfif_linux_set_sflow_probability(struct xfif *xfif_, uint32_t probability) { - return do_ioctl(dpif_, ODP_SET_SFLOW_PROBABILITY, &probability); + return do_ioctl(xfif_, XFLOW_SET_SFLOW_PROBABILITY, &probability); } + static int -dpif_linux_queue_to_priority(const struct dpif *dpif OVS_UNUSED, ++xfif_linux_queue_to_priority(const struct xfif *xfif OVS_UNUSED, + uint32_t queue_id, uint32_t *priority) + { + if (queue_id < 0xf000) { + *priority = TC_H_MAKE(1 << 16, queue_id + 1); + return 0; + } else { + return EINVAL; + } + } + static int -dpif_linux_recv(struct dpif *dpif_, struct ofpbuf **bufp) +xfif_linux_recv(struct xfif *xfif_, struct ofpbuf **bufp) { - struct dpif_linux *dpif = dpif_linux_cast(dpif_); + struct xfif_linux *xfif = xfif_linux_cast(xfif_); struct ofpbuf *buf; int retval; int error; @@@ -512,35 -525,36 +525,36 @@@ const struct xfif_class xfif_linux_clas "system", NULL, NULL, - dpif_linux_enumerate, - dpif_linux_open, - dpif_linux_close, - dpif_linux_get_all_names, - dpif_linux_destroy, - dpif_linux_get_stats, - dpif_linux_get_drop_frags, - dpif_linux_set_drop_frags, - dpif_linux_port_add, - dpif_linux_port_del, - dpif_linux_port_query_by_number, - dpif_linux_port_query_by_name, - dpif_linux_port_list, - dpif_linux_port_poll, - dpif_linux_port_poll_wait, - dpif_linux_port_group_get, - dpif_linux_port_group_set, - dpif_linux_flow_get, - dpif_linux_flow_put, - dpif_linux_flow_del, - dpif_linux_flow_flush, - dpif_linux_flow_list, - dpif_linux_execute, - dpif_linux_recv_get_mask, - dpif_linux_recv_set_mask, - dpif_linux_get_sflow_probability, - dpif_linux_set_sflow_probability, - dpif_linux_queue_to_priority, - dpif_linux_recv, - dpif_linux_recv_wait, + xfif_linux_enumerate, + xfif_linux_open, + xfif_linux_close, + xfif_linux_get_all_names, + xfif_linux_destroy, + xfif_linux_get_stats, + xfif_linux_get_drop_frags, + xfif_linux_set_drop_frags, + xfif_linux_port_add, + xfif_linux_port_del, + xfif_linux_port_query_by_number, + xfif_linux_port_query_by_name, + xfif_linux_port_list, + xfif_linux_port_poll, + xfif_linux_port_poll_wait, + xfif_linux_port_group_get, + xfif_linux_port_group_set, + xfif_linux_flow_get, + xfif_linux_flow_put, + xfif_linux_flow_del, + xfif_linux_flow_flush, + xfif_linux_flow_list, + xfif_linux_execute, + xfif_linux_recv_get_mask, + xfif_linux_recv_set_mask, + xfif_linux_get_sflow_probability, + xfif_linux_set_sflow_probability, ++ xfif_linux_queue_to_priority, + xfif_linux_recv, + xfif_linux_recv_wait, }; static int get_openvswitch_major(void); diff --cc lib/xfif-netdev.c index 1b6cf9936,000000000..58b054b9a mode 100644,000000..100644 --- a/lib/xfif-netdev.c +++ b/lib/xfif-netdev.c @@@ -1,1379 -1,0 +1,1364 @@@ +/* + * Copyright (c) 2009, 2010 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "xfif.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "csum.h" +#include "flow.h" +#include "hmap.h" +#include "list.h" +#include "netdev.h" +#include "xflow-util.h" +#include "ofp-print.h" +#include "ofpbuf.h" +#include "packets.h" +#include "poll-loop.h" +#include "queue.h" +#include "timeval.h" +#include "util.h" ++#include "vlog.h" +#include "xfif-provider.h" + - #include "vlog.h" - #define THIS_MODULE VLM_xfif_netdev ++VLOG_DEFINE_THIS_MODULE(xfif_netdev) + +/* Configuration parameters. */ +enum { N_QUEUES = 2 }; /* Number of queues for xfif_recv(). */ +enum { MAX_QUEUE_LEN = 100 }; /* Maximum number of packets per queue. */ +enum { N_GROUPS = 16 }; /* Number of port groups. */ +enum { MAX_PORTS = 256 }; /* Maximum number of ports. */ +enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ + +/* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP + * headers to be aligned on a 4-byte boundary. */ +enum { XF_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN }; + +/* Datapath based on the network device interface from netdev.h. */ +struct xf_netdev { + struct list node; + int xf_idx; + int open_cnt; + bool destroyed; + + bool drop_frags; /* Drop all IP fragments, if true. */ + struct ovs_queue queues[N_QUEUES]; /* Messages queued for xfif_recv(). */ + struct hmap flow_table; /* Flow table. */ + struct xflow_port_group groups[N_GROUPS]; + + /* Statistics. */ + long long int n_frags; /* Number of dropped IP fragments. */ + long long int n_hit; /* Number of flow table matches. */ + long long int n_missed; /* Number of flow table misses. */ + long long int n_lost; /* Number of misses not passed to client. */ + + /* Ports. */ + int n_ports; + struct xf_netdev_port *ports[MAX_PORTS]; + struct list port_list; + unsigned int serial; +}; + +/* A port in a netdev-based datapath. */ +struct xf_netdev_port { + int port_no; /* Index into xf_netdev's 'ports'. */ + struct list node; /* Element in xf_netdev's 'port_list'. */ + struct netdev *netdev; + bool internal; /* Internal port (as XFLOW_PORT_INTERNAL)? */ +}; + +/* A flow in xf_netdev's 'flow_table'. */ +struct xf_netdev_flow { + struct hmap_node node; /* Element in xf_netdev's 'flow_table'. */ + struct xflow_key key; + + /* Statistics. */ + struct timespec used; /* Last used time. */ + long long int packet_count; /* Number of packets matched. */ + long long int byte_count; /* Number of bytes matched. */ + uint8_t ip_tos; /* IP TOS value. */ + uint16_t tcp_ctl; /* Bitwise-OR of seen tcp_ctl values. */ + + /* Actions. */ + union xflow_action *actions; + unsigned int n_actions; +}; + +/* Interface to netdev-based datapath. */ +struct xfif_netdev { + struct xfif xfif; + struct xf_netdev *xf; + int listen_mask; + unsigned int xf_serial; +}; + +/* All netdev-based datapaths. */ +static struct xf_netdev *xf_netdevs[256]; +struct list xf_netdev_list = LIST_INITIALIZER(&xf_netdev_list); +enum { N_XF_NETDEVS = ARRAY_SIZE(xf_netdevs) }; + +/* Maximum port MTU seen so far. */ +static int max_mtu = ETH_PAYLOAD_MAX; + +static int get_port_by_number(struct xf_netdev *, uint16_t port_no, + struct xf_netdev_port **portp); +static int get_port_by_name(struct xf_netdev *, const char *devname, + struct xf_netdev_port **portp); +static void xf_netdev_free(struct xf_netdev *); +static void xf_netdev_flow_flush(struct xf_netdev *); +static int do_add_port(struct xf_netdev *, const char *devname, uint16_t flags, + uint16_t port_no); +static int do_del_port(struct xf_netdev *, uint16_t port_no); +static int xf_netdev_output_control(struct xf_netdev *, const struct ofpbuf *, + int queue_no, int port_no, uint32_t arg); +static int xf_netdev_execute_actions(struct xf_netdev *, + struct ofpbuf *, struct xflow_key *, + const union xflow_action *, int n); + +static struct xfif_netdev * +xfif_netdev_cast(const struct xfif *xfif) +{ + xfif_assert_class(xfif, &xfif_netdev_class); + return CONTAINER_OF(xfif, struct xfif_netdev, xfif); +} + +static struct xf_netdev * +get_xf_netdev(const struct xfif *xfif) +{ + return xfif_netdev_cast(xfif)->xf; +} + +static int +name_to_xf_idx(const char *name) +{ + if (!strncmp(name, "xf", 2) && isdigit((unsigned char)name[2])) { + int xf_idx = atoi(name + 2); + if (xf_idx >= 0 && xf_idx < N_XF_NETDEVS) { + return xf_idx; + } + } + return -1; +} + +static struct xf_netdev * +find_xf_netdev(const char *name) +{ + int xf_idx; + size_t i; + + xf_idx = name_to_xf_idx(name); + if (xf_idx >= 0) { + return xf_netdevs[xf_idx]; + } + + for (i = 0; i < N_XF_NETDEVS; i++) { + struct xf_netdev *xf = xf_netdevs[i]; + if (xf) { + struct xf_netdev_port *port; + if (!get_port_by_name(xf, name, &port)) { + return xf; + } + } + } + return NULL; +} + +static struct xfif * +create_xfif_netdev(struct xf_netdev *xf) +{ + struct xfif_netdev *xfif; + char *xfname; + + xf->open_cnt++; + + xfname = xasprintf("xf%d", xf->xf_idx); + xfif = xmalloc(sizeof *xfif); + xfif_init(&xfif->xfif, &xfif_netdev_class, xfname, xf->xf_idx, xf->xf_idx); + xfif->xf = xf; + xfif->listen_mask = 0; + xfif->xf_serial = xf->serial; + free(xfname); + + return &xfif->xfif; +} + +static int +create_xf_netdev(const char *name, int xf_idx, struct xfif **xfifp) +{ + struct xf_netdev *xf; + int error; + int i; + + if (xf_netdevs[xf_idx]) { + return EBUSY; + } + + /* Create datapath. */ + xf_netdevs[xf_idx] = xf = xzalloc(sizeof *xf); + list_push_back(&xf_netdev_list, &xf->node); + xf->xf_idx = xf_idx; + xf->open_cnt = 0; + xf->drop_frags = false; + for (i = 0; i < N_QUEUES; i++) { + queue_init(&xf->queues[i]); + } + hmap_init(&xf->flow_table); + for (i = 0; i < N_GROUPS; i++) { + xf->groups[i].ports = NULL; + xf->groups[i].n_ports = 0; + xf->groups[i].group = i; + } + list_init(&xf->port_list); + error = do_add_port(xf, name, XFLOW_PORT_INTERNAL, XFLOWP_LOCAL); + if (error) { + xf_netdev_free(xf); + return ENODEV; + } + + *xfifp = create_xfif_netdev(xf); + return 0; +} + +static int +xfif_netdev_open(const char *name, const char *type OVS_UNUSED, bool create, + struct xfif **xfifp) +{ + if (create) { + if (find_xf_netdev(name)) { + return EEXIST; + } else { + int xf_idx = name_to_xf_idx(name); + if (xf_idx >= 0) { + return create_xf_netdev(name, xf_idx, xfifp); + } else { + /* Scan for unused xf_idx number. */ + for (xf_idx = 0; xf_idx < N_XF_NETDEVS; xf_idx++) { + int error = create_xf_netdev(name, xf_idx, xfifp); + if (error != EBUSY) { + return error; + } + } + + /* All datapath numbers in use. */ + return ENOBUFS; + } + } + } else { + struct xf_netdev *xf = find_xf_netdev(name); + if (xf) { + *xfifp = create_xfif_netdev(xf); + return 0; + } else { + return ENODEV; + } + } +} + +static void +xf_netdev_free(struct xf_netdev *xf) +{ + int i; + + xf_netdev_flow_flush(xf); + while (xf->n_ports > 0) { + struct xf_netdev_port *port = CONTAINER_OF( + xf->port_list.next, struct xf_netdev_port, node); + do_del_port(xf, port->port_no); + } + for (i = 0; i < N_QUEUES; i++) { + queue_destroy(&xf->queues[i]); + } + hmap_destroy(&xf->flow_table); + for (i = 0; i < N_GROUPS; i++) { + free(xf->groups[i].ports); + } + xf_netdevs[xf->xf_idx] = NULL; + list_remove(&xf->node); + free(xf); +} + +static void +xfif_netdev_close(struct xfif *xfif) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + assert(xf->open_cnt > 0); + if (--xf->open_cnt == 0 && xf->destroyed) { + xf_netdev_free(xf); + } + free(xfif); +} + +static int +xfif_netdev_destroy(struct xfif *xfif) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + xf->destroyed = true; + return 0; +} + +static int +xfif_netdev_get_stats(const struct xfif *xfif, struct xflow_stats *stats) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + memset(stats, 0, sizeof *stats); + stats->n_flows = hmap_count(&xf->flow_table); + stats->cur_capacity = hmap_capacity(&xf->flow_table); + stats->max_capacity = MAX_FLOWS; + stats->n_ports = xf->n_ports; + stats->max_ports = MAX_PORTS; + stats->max_groups = N_GROUPS; + stats->n_frags = xf->n_frags; + stats->n_hit = xf->n_hit; + stats->n_missed = xf->n_missed; + stats->n_lost = xf->n_lost; + stats->max_miss_queue = MAX_QUEUE_LEN; + stats->max_action_queue = MAX_QUEUE_LEN; + return 0; +} + +static int +xfif_netdev_get_drop_frags(const struct xfif *xfif, bool *drop_fragsp) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + *drop_fragsp = xf->drop_frags; + return 0; +} + +static int +xfif_netdev_set_drop_frags(struct xfif *xfif, bool drop_frags) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + xf->drop_frags = drop_frags; + return 0; +} + +static int +do_add_port(struct xf_netdev *xf, const char *devname, uint16_t flags, + uint16_t port_no) +{ + bool internal = (flags & XFLOW_PORT_INTERNAL) != 0; + struct xf_netdev_port *port; + struct netdev_options netdev_options; + struct netdev *netdev; + int mtu; + int error; + + /* XXX reject devices already in some xf_netdev. */ + + /* Open and validate network device. */ + memset(&netdev_options, 0, sizeof netdev_options); + netdev_options.name = devname; + netdev_options.ethertype = NETDEV_ETH_TYPE_ANY; + if (internal) { + netdev_options.type = "tap"; + } + + error = netdev_open(&netdev_options, &netdev); + if (error) { + return error; + } + /* XXX reject loopback devices */ + /* XXX reject non-Ethernet devices */ + + error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, false); + if (error) { + netdev_close(netdev); + return error; + } + + port = xmalloc(sizeof *port); + port->port_no = port_no; + port->netdev = netdev; + port->internal = internal; + + netdev_get_mtu(netdev, &mtu); + if (mtu > max_mtu) { + max_mtu = mtu; + } + + list_push_back(&xf->port_list, &port->node); + xf->ports[port_no] = port; + xf->n_ports++; + xf->serial++; + + return 0; +} + +static int +xfif_netdev_port_add(struct xfif *xfif, const char *devname, uint16_t flags, + uint16_t *port_nop) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + int port_no; + + for (port_no = 0; port_no < MAX_PORTS; port_no++) { + if (!xf->ports[port_no]) { + *port_nop = port_no; + return do_add_port(xf, devname, flags, port_no); + } + } + return EFBIG; +} + +static int +xfif_netdev_port_del(struct xfif *xfif, uint16_t port_no) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + return port_no == XFLOWP_LOCAL ? EINVAL : do_del_port(xf, port_no); +} + +static bool +is_valid_port_number(uint16_t port_no) +{ + return port_no < MAX_PORTS; +} + +static int +get_port_by_number(struct xf_netdev *xf, + uint16_t port_no, struct xf_netdev_port **portp) +{ + if (!is_valid_port_number(port_no)) { + *portp = NULL; + return EINVAL; + } else { + *portp = xf->ports[port_no]; + return *portp ? 0 : ENOENT; + } +} + +static int +get_port_by_name(struct xf_netdev *xf, + const char *devname, struct xf_netdev_port **portp) +{ + struct xf_netdev_port *port; + + LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) { + if (!strcmp(netdev_get_name(port->netdev), devname)) { + *portp = port; + return 0; + } + } + return ENOENT; +} + +static int +do_del_port(struct xf_netdev *xf, uint16_t port_no) +{ + struct xf_netdev_port *port; + char *name; + int error; + + error = get_port_by_number(xf, port_no, &port); + if (error) { + return error; + } + + list_remove(&port->node); + xf->ports[port->port_no] = NULL; + xf->n_ports--; + xf->serial++; + + name = xstrdup(netdev_get_name(port->netdev)); + netdev_close(port->netdev); + + free(name); + free(port); + + return 0; +} + +static void +answer_port_query(const struct xf_netdev_port *port, struct xflow_port *xflow_port) +{ + memset(xflow_port, 0, sizeof *xflow_port); + ovs_strlcpy(xflow_port->devname, netdev_get_name(port->netdev), + sizeof xflow_port->devname); + xflow_port->port = port->port_no; + xflow_port->flags = port->internal ? XFLOW_PORT_INTERNAL : 0; +} + +static int +xfif_netdev_port_query_by_number(const struct xfif *xfif, uint16_t port_no, + struct xflow_port *xflow_port) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_port *port; + int error; + + error = get_port_by_number(xf, port_no, &port); + if (!error) { + answer_port_query(port, xflow_port); + } + return error; +} + +static int +xfif_netdev_port_query_by_name(const struct xfif *xfif, const char *devname, + struct xflow_port *xflow_port) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_port *port; + int error; + + error = get_port_by_name(xf, devname, &port); + if (!error) { + answer_port_query(port, xflow_port); + } + return error; +} + +static void +xf_netdev_free_flow(struct xf_netdev *xf, struct xf_netdev_flow *flow) +{ + hmap_remove(&xf->flow_table, &flow->node); + free(flow->actions); + free(flow); +} + +static void +xf_netdev_flow_flush(struct xf_netdev *xf) +{ + struct xf_netdev_flow *flow, *next; + + HMAP_FOR_EACH_SAFE (flow, next, struct xf_netdev_flow, node, + &xf->flow_table) { + xf_netdev_free_flow(xf, flow); + } +} + +static int +xfif_netdev_flow_flush(struct xfif *xfif) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + xf_netdev_flow_flush(xf); + return 0; +} + +static int +xfif_netdev_port_list(const struct xfif *xfif, struct xflow_port *ports, int n) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_port *port; + int i; + + i = 0; + LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) { + struct xflow_port *xflow_port = &ports[i]; + if (i >= n) { + break; + } + answer_port_query(port, xflow_port); + i++; + } + return xf->n_ports; +} + +static int +xfif_netdev_port_poll(const struct xfif *xfif_, char **devnamep OVS_UNUSED) +{ + struct xfif_netdev *xfif = xfif_netdev_cast(xfif_); + if (xfif->xf_serial != xfif->xf->serial) { + xfif->xf_serial = xfif->xf->serial; + return ENOBUFS; + } else { + return EAGAIN; + } +} + +static void +xfif_netdev_port_poll_wait(const struct xfif *xfif_) +{ + struct xfif_netdev *xfif = xfif_netdev_cast(xfif_); + if (xfif->xf_serial != xfif->xf->serial) { + poll_immediate_wake(); + } +} + +static int +get_port_group(const struct xfif *xfif, int group_no, + struct xflow_port_group **groupp) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + + if (group_no >= 0 && group_no < N_GROUPS) { + *groupp = &xf->groups[group_no]; + return 0; + } else { + *groupp = NULL; + return EINVAL; + } +} + +static int +xfif_netdev_port_group_get(const struct xfif *xfif, int group_no, + uint16_t ports[], int n) +{ + struct xflow_port_group *group; + int error; + + if (n < 0) { + return -EINVAL; + } + + error = get_port_group(xfif, group_no, &group); + if (!error) { + memcpy(ports, group->ports, MIN(n, group->n_ports) * sizeof *ports); + return group->n_ports; + } else { + return -error; + } +} + +static int +xfif_netdev_port_group_set(struct xfif *xfif, int group_no, + const uint16_t ports[], int n) +{ + struct xflow_port_group *group; + int error; + + if (n < 0 || n > MAX_PORTS) { + return EINVAL; + } + + error = get_port_group(xfif, group_no, &group); + if (!error) { + free(group->ports); + group->ports = xmemdup(ports, n * sizeof *group->ports); + group->n_ports = n; + group->group = group_no; + } + return error; +} + +static struct xf_netdev_flow * +xf_netdev_lookup_flow(const struct xf_netdev *xf, + const struct xflow_key *key) +{ + struct xf_netdev_flow *flow; + + HMAP_FOR_EACH_WITH_HASH (flow, struct xf_netdev_flow, node, + xflow_key_hash(key, 0), &xf->flow_table) { + if (xflow_key_equal(&flow->key, key)) { + return flow; + } + } + return NULL; +} + +static void +answer_flow_query(struct xf_netdev_flow *flow, uint32_t query_flags, + struct xflow_flow *xflow_flow) +{ + if (flow) { + xflow_flow->key = flow->key; + xflow_flow->stats.n_packets = flow->packet_count; + xflow_flow->stats.n_bytes = flow->byte_count; + xflow_flow->stats.used_sec = flow->used.tv_sec; + xflow_flow->stats.used_nsec = flow->used.tv_nsec; + xflow_flow->stats.tcp_flags = TCP_FLAGS(flow->tcp_ctl); + xflow_flow->stats.ip_tos = flow->ip_tos; + xflow_flow->stats.error = 0; + if (xflow_flow->n_actions > 0) { + unsigned int n = MIN(xflow_flow->n_actions, flow->n_actions); + memcpy(xflow_flow->actions, flow->actions, + n * sizeof *xflow_flow->actions); + xflow_flow->n_actions = flow->n_actions; + } + + if (query_flags & XFLOWFF_ZERO_TCP_FLAGS) { + flow->tcp_ctl = 0; + } + + } else { + xflow_flow->stats.error = ENOENT; + } +} + +static int +xfif_netdev_flow_get(const struct xfif *xfif, struct xflow_flow flows[], int n) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + int i; + + for (i = 0; i < n; i++) { + struct xflow_flow *xflow_flow = &flows[i]; + answer_flow_query(xf_netdev_lookup_flow(xf, &xflow_flow->key), + xflow_flow->flags, xflow_flow); + } + return 0; +} + +static int +xfif_netdev_validate_actions(const union xflow_action *actions, int n_actions, + bool *mutates) +{ + unsigned int i; + + *mutates = false; + for (i = 0; i < n_actions; i++) { + const union xflow_action *a = &actions[i]; + switch (a->type) { + case XFLOWAT_OUTPUT: + if (a->output.port >= MAX_PORTS) { + return EINVAL; + } + break; + + case XFLOWAT_OUTPUT_GROUP: + *mutates = true; + if (a->output_group.group >= N_GROUPS) { + return EINVAL; + } + break; + + case XFLOWAT_CONTROLLER: + break; + + case XFLOWAT_SET_DL_TCI: + *mutates = true; + if (a->dl_tci.mask != htons(VLAN_VID_MASK) + && a->dl_tci.mask != htons(VLAN_PCP_MASK) + && a->dl_tci.mask != htons(VLAN_VID_MASK | VLAN_PCP_MASK)) { + return EINVAL; + } + if (a->dl_tci.tci & ~a->dl_tci.mask){ + return EINVAL; + } + break; + + case XFLOWAT_SET_NW_TOS: + *mutates = true; + if (a->nw_tos.nw_tos & IP_ECN_MASK) { + return EINVAL; + } + break; + + case XFLOWAT_STRIP_VLAN: + case XFLOWAT_SET_DL_SRC: + case XFLOWAT_SET_DL_DST: + case XFLOWAT_SET_NW_SRC: + case XFLOWAT_SET_NW_DST: + case XFLOWAT_SET_TP_SRC: + case XFLOWAT_SET_TP_DST: + *mutates = true; + break; + + default: + return EOPNOTSUPP; + } + } + return 0; +} + +static int +set_flow_actions(struct xf_netdev_flow *flow, struct xflow_flow *xflow_flow) +{ + size_t n_bytes; + bool mutates; + int error; + + if (xflow_flow->n_actions >= 4096 / sizeof *xflow_flow->actions) { + return EINVAL; + } + error = xfif_netdev_validate_actions(xflow_flow->actions, + xflow_flow->n_actions, &mutates); + if (error) { + return error; + } + + n_bytes = xflow_flow->n_actions * sizeof *flow->actions; + flow->actions = xrealloc(flow->actions, n_bytes); + flow->n_actions = xflow_flow->n_actions; + memcpy(flow->actions, xflow_flow->actions, n_bytes); + return 0; +} + +static int +add_flow(struct xfif *xfif, struct xflow_flow *xflow_flow) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_flow *flow; + int error; + + flow = xzalloc(sizeof *flow); + flow->key = xflow_flow->key; + + error = set_flow_actions(flow, xflow_flow); + if (error) { + free(flow); + return error; + } + + hmap_insert(&xf->flow_table, &flow->node, + xflow_key_hash(&flow->key, 0)); + return 0; +} + +static void +clear_stats(struct xf_netdev_flow *flow) +{ + flow->used.tv_sec = 0; + flow->used.tv_nsec = 0; + flow->packet_count = 0; + flow->byte_count = 0; + flow->ip_tos = 0; + flow->tcp_ctl = 0; +} + +static int +xfif_netdev_flow_put(struct xfif *xfif, struct xflow_flow_put *put) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_flow *flow; + + flow = xf_netdev_lookup_flow(xf, &put->flow.key); + if (!flow) { + if (put->flags & XFLOWPF_CREATE) { + if (hmap_count(&xf->flow_table) < MAX_FLOWS) { + return add_flow(xfif, &put->flow); + } else { + return EFBIG; + } + } else { + return ENOENT; + } + } else { + if (put->flags & XFLOWPF_MODIFY) { + int error = set_flow_actions(flow, &put->flow); + if (!error && put->flags & XFLOWPF_ZERO_STATS) { + clear_stats(flow); + } + return error; + } else { + return EEXIST; + } + } +} + + +static int +xfif_netdev_flow_del(struct xfif *xfif, struct xflow_flow *xflow_flow) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_flow *flow; + + flow = xf_netdev_lookup_flow(xf, &xflow_flow->key); + if (flow) { + answer_flow_query(flow, 0, xflow_flow); + xf_netdev_free_flow(xf, flow); + return 0; + } else { + return ENOENT; + } +} + +static int +xfif_netdev_flow_list(const struct xfif *xfif, struct xflow_flow flows[], int n) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_flow *flow; + int i; + + i = 0; + HMAP_FOR_EACH (flow, struct xf_netdev_flow, node, &xf->flow_table) { + if (i >= n) { + break; + } + answer_flow_query(flow, 0, &flows[i++]); + } + return hmap_count(&xf->flow_table); +} + +static int +xfif_netdev_execute(struct xfif *xfif, uint16_t in_port, + const union xflow_action actions[], int n_actions, + const struct ofpbuf *packet) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct ofpbuf copy; + bool mutates; + struct xflow_key key; + flow_t flow; + int error; + + if (packet->size < ETH_HEADER_LEN || packet->size > UINT16_MAX) { + return EINVAL; + } + + error = xfif_netdev_validate_actions(actions, n_actions, &mutates); + if (error) { + return error; + } + + if (mutates) { + /* We need a deep copy of 'packet' since we're going to modify its + * data. */ + ofpbuf_init(©, XF_NETDEV_HEADROOM + packet->size); + copy.data = (char*)copy.base + XF_NETDEV_HEADROOM; + ofpbuf_put(©, packet->data, packet->size); + } else { + /* We still need a shallow copy of 'packet', even though we won't + * modify its data, because flow_extract() modifies packet->l2, etc. + * We could probably get away with modifying those but it's more polite + * if we don't. */ + copy = *packet; + } + flow_extract(©, 0, in_port, &flow); + xflow_key_from_flow(&key, &flow); + error = xf_netdev_execute_actions(xf, ©, &key, actions, n_actions); + if (mutates) { + ofpbuf_uninit(©); + } + return error; +} + +static int +xfif_netdev_recv_get_mask(const struct xfif *xfif, int *listen_mask) +{ + struct xfif_netdev *xfif_netdev = xfif_netdev_cast(xfif); + *listen_mask = xfif_netdev->listen_mask; + return 0; +} + +static int +xfif_netdev_recv_set_mask(struct xfif *xfif, int listen_mask) +{ + struct xfif_netdev *xfif_netdev = xfif_netdev_cast(xfif); + if (!(listen_mask & ~XFLOWL_ALL)) { + xfif_netdev->listen_mask = listen_mask; + return 0; + } else { + return EINVAL; + } +} + +static struct ovs_queue * +find_nonempty_queue(struct xfif *xfif) +{ + struct xfif_netdev *xfif_netdev = xfif_netdev_cast(xfif); + struct xf_netdev *xf = get_xf_netdev(xfif); + int mask = xfif_netdev->listen_mask; + int i; + + for (i = 0; i < N_QUEUES; i++) { + struct ovs_queue *q = &xf->queues[i]; + if (q->n && mask & (1u << i)) { + return q; + } + } + return NULL; +} + +static int +xfif_netdev_recv(struct xfif *xfif, struct ofpbuf **bufp) +{ + struct ovs_queue *q = find_nonempty_queue(xfif); + if (q) { + *bufp = queue_pop_head(q); + return 0; + } else { + return EAGAIN; + } +} + +static void +xfif_netdev_recv_wait(struct xfif *xfif) +{ + struct ovs_queue *q = find_nonempty_queue(xfif); + if (q) { + poll_immediate_wake(); + } else { + /* No messages ready to be received, and xf_wait() will ensure that we + * wake up to queue new messages, so there is nothing to do. */ + } +} + +static void +xf_netdev_flow_used(struct xf_netdev_flow *flow, + const struct xflow_key *key, + const struct ofpbuf *packet) +{ + time_timespec(&flow->used); + flow->packet_count++; + flow->byte_count += packet->size; + if (key->dl_type == htons(ETH_TYPE_IP)) { + struct ip_header *nh = packet->l3; + flow->ip_tos = nh->ip_tos; + + if (key->nw_proto == IPPROTO_TCP) { + struct tcp_header *th = packet->l4; + flow->tcp_ctl |= th->tcp_ctl; + } + } +} + +static void +xf_netdev_port_input(struct xf_netdev *xf, struct xf_netdev_port *port, + struct ofpbuf *packet) +{ + struct xf_netdev_flow *flow; + struct xflow_key key; + flow_t f; + + if (flow_extract(packet, 0, port->port_no, &f) && xf->drop_frags) { + xf->n_frags++; + return; + } + xflow_key_from_flow(&key, &f); + + flow = xf_netdev_lookup_flow(xf, &key); + if (flow) { + xf_netdev_flow_used(flow, &key, packet); + xf_netdev_execute_actions(xf, packet, &key, + flow->actions, flow->n_actions); + xf->n_hit++; + } else { + xf->n_missed++; + xf_netdev_output_control(xf, packet, _XFLOWL_MISS_NR, port->port_no, 0); + } +} + +static void +xf_netdev_run(void) +{ + struct ofpbuf packet; + struct xf_netdev *xf; + + ofpbuf_init(&packet, XF_NETDEV_HEADROOM + max_mtu); + LIST_FOR_EACH (xf, struct xf_netdev, node, &xf_netdev_list) { + struct xf_netdev_port *port; + + LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) { + int error; + + /* Reset packet contents. */ + packet.data = (char*)packet.base + XF_NETDEV_HEADROOM; + packet.size = 0; + + error = netdev_recv(port->netdev, &packet); + if (!error) { + xf_netdev_port_input(xf, port, &packet); + } else if (error != EAGAIN) { + struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_ERR_RL(&rl, "error receiving data from %s: %s", + netdev_get_name(port->netdev), strerror(error)); + } + } + } + ofpbuf_uninit(&packet); +} + +static void +xf_netdev_wait(void) +{ + struct xf_netdev *xf; + + LIST_FOR_EACH (xf, struct xf_netdev, node, &xf_netdev_list) { + struct xf_netdev_port *port; + LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) { + netdev_recv_wait(port->netdev); + } + } +} + + +/* Modify or add a 802.1Q header in 'packet' according to 'a'. */ +static void +xf_netdev_set_dl_tci(struct ofpbuf *packet, struct xflow_key *key, + const struct xflow_action_dl_tci *a) +{ + struct vlan_eth_header *veh; + + if (key->dl_tci) { + veh = packet->l2; + veh->veth_tci = (veh->veth_tci & ~a->mask) | a->tci; + } else { + /* Insert new 802.1Q header. */ + struct eth_header *eh = packet->l2; + struct vlan_eth_header tmp; + memcpy(tmp.veth_dst, eh->eth_dst, ETH_ADDR_LEN); + memcpy(tmp.veth_src, eh->eth_src, ETH_ADDR_LEN); + tmp.veth_type = htons(ETH_TYPE_VLAN); + tmp.veth_tci = htons(a->tci); + tmp.veth_next_type = eh->eth_type; + + veh = ofpbuf_push_uninit(packet, VLAN_HEADER_LEN); + memcpy(veh, &tmp, sizeof tmp); + packet->l2 = (char*)packet->l2 - VLAN_HEADER_LEN; + } + + key->dl_tci = veh->veth_tci | htons(XFLOW_TCI_PRESENT); +} + +static void +xf_netdev_strip_vlan(struct ofpbuf *packet, struct xflow_key *key) +{ + struct vlan_eth_header *veh = packet->l2; + if (veh->veth_type == htons(ETH_TYPE_VLAN)) { + struct eth_header tmp; + + memcpy(tmp.eth_dst, veh->veth_dst, ETH_ADDR_LEN); + memcpy(tmp.eth_src, veh->veth_src, ETH_ADDR_LEN); + tmp.eth_type = veh->veth_next_type; + + packet->size -= VLAN_HEADER_LEN; + packet->data = (char*)packet->data + VLAN_HEADER_LEN; + packet->l2 = (char*)packet->l2 + VLAN_HEADER_LEN; + memcpy(packet->data, &tmp, sizeof tmp); + + key->dl_tci = htons(0); + } +} + +static void - xf_netdev_set_dl_src(struct ofpbuf *packet, struct xflow_key *key, ++xf_netdev_set_dl_src(struct ofpbuf *packet, + const uint8_t dl_addr[ETH_ADDR_LEN]) +{ + struct eth_header *eh = packet->l2; + memcpy(eh->eth_src, dl_addr, sizeof eh->eth_src); - memcpy(key->dl_src, dl_addr, sizeof key->dl_src); +} + +static void - xf_netdev_set_dl_dst(struct ofpbuf *packet, struct xflow_key *key, ++xf_netdev_set_dl_dst(struct ofpbuf *packet, + const uint8_t dl_addr[ETH_ADDR_LEN]) +{ + struct eth_header *eh = packet->l2; + memcpy(eh->eth_dst, dl_addr, sizeof eh->eth_dst); - memcpy(key->dl_dst, dl_addr, sizeof key->dl_dst); +} + +static void - xf_netdev_set_nw_addr(struct ofpbuf *packet, struct xflow_key *key, ++xf_netdev_set_nw_addr(struct ofpbuf *packet, const struct xflow_key *key, + const struct xflow_action_nw_addr *a) +{ + if (key->dl_type == htons(ETH_TYPE_IP)) { + struct ip_header *nh = packet->l3; + uint32_t *field; + + field = a->type == XFLOWAT_SET_NW_SRC ? &nh->ip_src : &nh->ip_dst; + if (key->nw_proto == IP_TYPE_TCP) { + struct tcp_header *th = packet->l4; + th->tcp_csum = recalc_csum32(th->tcp_csum, *field, a->nw_addr); + } else if (key->nw_proto == IP_TYPE_UDP) { + struct udp_header *uh = packet->l4; + if (uh->udp_csum) { + uh->udp_csum = recalc_csum32(uh->udp_csum, *field, a->nw_addr); + if (!uh->udp_csum) { + uh->udp_csum = 0xffff; + } + } + } + nh->ip_csum = recalc_csum32(nh->ip_csum, *field, a->nw_addr); + *field = a->nw_addr; - - if (a->type == XFLOWAT_SET_NW_SRC) { - key->nw_src = a->type; - } else { - key->nw_dst = a->type; - } + } +} + +static void - xf_netdev_set_nw_tos(struct ofpbuf *packet, struct xflow_key *key, ++xf_netdev_set_nw_tos(struct ofpbuf *packet, const struct xflow_key *key, + const struct xflow_action_nw_tos *a) +{ + if (key->dl_type == htons(ETH_TYPE_IP)) { + struct ip_header *nh = packet->l3; + uint8_t *field = &nh->ip_tos; + + /* Set the DSCP bits and preserve the ECN bits. */ + uint8_t new = a->nw_tos | (nh->ip_tos & IP_ECN_MASK); + + nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t)*field), + htons((uint16_t)a->nw_tos)); + *field = new; - key->nw_tos = a->nw_tos; + } +} + +static void - xf_netdev_set_tp_port(struct ofpbuf *packet, struct xflow_key *key, ++xf_netdev_set_tp_port(struct ofpbuf *packet, const struct xflow_key *key, + const struct xflow_action_tp_port *a) +{ + if (key->dl_type == htons(ETH_TYPE_IP)) { + uint16_t *field; + if (key->nw_proto == IPPROTO_TCP) { + struct tcp_header *th = packet->l4; + field = a->type == XFLOWAT_SET_TP_SRC ? &th->tcp_src : &th->tcp_dst; + th->tcp_csum = recalc_csum16(th->tcp_csum, *field, a->tp_port); + *field = a->tp_port; + } else if (key->nw_proto == IPPROTO_UDP) { + struct udp_header *uh = packet->l4; + field = a->type == XFLOWAT_SET_TP_SRC ? &uh->udp_src : &uh->udp_dst; + uh->udp_csum = recalc_csum16(uh->udp_csum, *field, a->tp_port); + *field = a->tp_port; + } else { + return; + } - - if (a->type == XFLOWAT_SET_TP_SRC) { - key->tp_src = a->tp_port; - } else { - key->tp_dst = a->tp_port; - } + } +} + +static void +xf_netdev_output_port(struct xf_netdev *xf, struct ofpbuf *packet, + uint16_t out_port) +{ + struct xf_netdev_port *p = xf->ports[out_port]; + if (p) { + netdev_send(p->netdev, packet); + } +} + +static void +xf_netdev_output_group(struct xf_netdev *xf, uint16_t group, uint16_t in_port, + struct ofpbuf *packet) +{ + struct xflow_port_group *g = &xf->groups[group]; + int i; + + for (i = 0; i < g->n_ports; i++) { + uint16_t out_port = g->ports[i]; + if (out_port != in_port) { + xf_netdev_output_port(xf, packet, out_port); + } + } +} + +static int +xf_netdev_output_control(struct xf_netdev *xf, const struct ofpbuf *packet, + int queue_no, int port_no, uint32_t arg) +{ + struct ovs_queue *q = &xf->queues[queue_no]; + struct xflow_msg *header; + struct ofpbuf *msg; + size_t msg_size; + + if (q->n >= MAX_QUEUE_LEN) { + xf->n_lost++; + return ENOBUFS; + } + + msg_size = sizeof *header + packet->size; + msg = ofpbuf_new(msg_size + XFIF_RECV_MSG_PADDING); + header = ofpbuf_put_uninit(msg, sizeof *header); + ofpbuf_reserve(msg, XFIF_RECV_MSG_PADDING); + header->type = queue_no; + header->length = msg_size; + header->port = port_no; + header->arg = arg; + ofpbuf_put(msg, packet->data, packet->size); + queue_push_tail(q, msg); + + return 0; +} + +static int +xf_netdev_execute_actions(struct xf_netdev *xf, + struct ofpbuf *packet, struct xflow_key *key, + const union xflow_action *actions, int n_actions) +{ + int i; + for (i = 0; i < n_actions; i++) { + const union xflow_action *a = &actions[i]; + + switch (a->type) { + case XFLOWAT_OUTPUT: + xf_netdev_output_port(xf, packet, a->output.port); + break; + + case XFLOWAT_OUTPUT_GROUP: + xf_netdev_output_group(xf, a->output_group.group, key->in_port, + packet); + break; + + case XFLOWAT_CONTROLLER: + xf_netdev_output_control(xf, packet, _XFLOWL_ACTION_NR, + key->in_port, a->controller.arg); + break; + + case XFLOWAT_SET_DL_TCI: + xf_netdev_set_dl_tci(packet, key, &a->dl_tci); + break; + + case XFLOWAT_STRIP_VLAN: + xf_netdev_strip_vlan(packet, key); + break; + + case XFLOWAT_SET_DL_SRC: - xf_netdev_set_dl_src(packet, key, a->dl_addr.dl_addr); ++ xf_netdev_set_dl_src(packet, a->dl_addr.dl_addr); + break; + + case XFLOWAT_SET_DL_DST: - xf_netdev_set_dl_dst(packet, key, a->dl_addr.dl_addr); ++ xf_netdev_set_dl_dst(packet, a->dl_addr.dl_addr); + break; + + case XFLOWAT_SET_NW_SRC: + case XFLOWAT_SET_NW_DST: + xf_netdev_set_nw_addr(packet, key, &a->nw_addr); + break; + + case XFLOWAT_SET_NW_TOS: + xf_netdev_set_nw_tos(packet, key, &a->nw_tos); + break; + + case XFLOWAT_SET_TP_SRC: + case XFLOWAT_SET_TP_DST: + xf_netdev_set_tp_port(packet, key, &a->tp_port); + break; + } + } + return 0; +} + +const struct xfif_class xfif_netdev_class = { + "netdev", + xf_netdev_run, + xf_netdev_wait, + NULL, /* enumerate */ + xfif_netdev_open, + xfif_netdev_close, + NULL, /* get_all_names */ + xfif_netdev_destroy, + xfif_netdev_get_stats, + xfif_netdev_get_drop_frags, + xfif_netdev_set_drop_frags, + xfif_netdev_port_add, + xfif_netdev_port_del, + xfif_netdev_port_query_by_number, + xfif_netdev_port_query_by_name, + xfif_netdev_port_list, + xfif_netdev_port_poll, + xfif_netdev_port_poll_wait, + xfif_netdev_port_group_get, + xfif_netdev_port_group_set, + xfif_netdev_flow_get, + xfif_netdev_flow_put, + xfif_netdev_flow_del, + xfif_netdev_flow_flush, + xfif_netdev_flow_list, + xfif_netdev_execute, + xfif_netdev_recv_get_mask, + xfif_netdev_recv_set_mask, + NULL, /* get_sflow_probability */ + NULL, /* set_sflow_probability */ + xfif_netdev_recv, + xfif_netdev_recv_wait, +}; diff --cc lib/xfif-provider.h index 2c7720b5e,b2f9d4bd1..8eceba038 --- a/lib/xfif-provider.h +++ b/lib/xfif-provider.h @@@ -302,12 -300,18 +302,18 @@@ struct xfif_class * 'probability' is expressed as the number of packets out of UINT_MAX to * sample, e.g. probability/UINT_MAX is the probability of sampling a given * packet. */ - int (*set_sflow_probability)(struct dpif *dpif, uint32_t probability); + int (*set_sflow_probability)(struct xfif *xfif, uint32_t probability); + /* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a - * priority value for use in the ODPAT_SET_PRIORITY action in ++ * priority value for use in the XFLOWAT_SET_PRIORITY action in + * '*priority'. */ - int (*queue_to_priority)(const struct dpif *dpif, uint32_t queue_id, ++ int (*queue_to_priority)(const struct xfif *xfif, uint32_t queue_id, + uint32_t *priority); + - /* Attempts to receive a message from 'dpif'. If successful, stores the + /* Attempts to receive a message from 'xfif'. If successful, stores the * message into '*packetp'. The message, if one is received, must begin - * with 'struct odp_msg' as a header, and must have at least - * DPIF_RECV_MSG_PADDING bytes of headroom (allocated using + * with 'struct xflow_msg' as a header, and must have at least + * XFIF_RECV_MSG_PADDING bytes of headroom (allocated using * e.g. ofpbuf_reserve()). Only messages of the types selected with the * set_listen_mask member function should be received. * diff --cc lib/xfif.c index 91e894924,40741a45e..4eda1dffb --- a/lib/xfif.c +++ b/lib/xfif.c @@@ -38,22 -37,22 +38,22 @@@ #include "svec.h" #include "util.h" #include "valgrind.h" - #include "vlog.h" - #define THIS_MODULE VLM_xfif + -VLOG_DEFINE_THIS_MODULE(dpif) ++VLOG_DEFINE_THIS_MODULE(xfif) -static const struct dpif_class *base_dpif_classes[] = { +static const struct xfif_class *base_xfif_classes[] = { #ifdef HAVE_NETLINK - &dpif_linux_class, + &xfif_linux_class, #endif - &dpif_netdev_class, + &xfif_netdev_class, }; -struct registered_dpif_class { - struct dpif_class dpif_class; +struct registered_xfif_class { + struct xfif_class xfif_class; int refcount; }; -static struct shash dpif_classes = SHASH_INITIALIZER(&dpif_classes); +static struct shash xfif_classes = SHASH_INITIALIZER(&xfif_classes); /* Rate limit for individual messages going to or from the datapath, output at * DBG level. This is very high because, if these are enabled, it is because @@@ -1080,26 -1079,45 +1080,45 @@@ xfif_recv_purge(struct xfif *xfif return 0; } -/* Arranges for the poll loop to wake up when 'dpif' has a message queued to be - * received with dpif_recv(). */ +/* Arranges for the poll loop to wake up when 'xfif' has a message queued to be + * received with xfif_recv(). */ void -dpif_recv_wait(struct dpif *dpif) +xfif_recv_wait(struct xfif *xfif) { - dpif->dpif_class->recv_wait(dpif); + xfif->xfif_class->recv_wait(xfif); } -/* Obtains the NetFlow engine type and engine ID for 'dpif' into '*engine_type' +/* Obtains the NetFlow engine type and engine ID for 'xfif' into '*engine_type' * and '*engine_id', respectively. */ void -dpif_get_netflow_ids(const struct dpif *dpif, +xfif_get_netflow_ids(const struct xfif *xfif, uint8_t *engine_type, uint8_t *engine_id) { - *engine_type = dpif->netflow_engine_type; - *engine_id = dpif->netflow_engine_id; + *engine_type = xfif->netflow_engine_type; + *engine_id = xfif->netflow_engine_id; } + + /* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a priority + * value for use in the ODPAT_SET_PRIORITY action. On success, returns 0 and + * stores the priority into '*priority'. On failure, returns a positive errno + * value and stores 0 into '*priority'. */ + int + dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id, + uint32_t *priority) + { + int error = (dpif->dpif_class->queue_to_priority + ? dpif->dpif_class->queue_to_priority(dpif, queue_id, + priority) + : EOPNOTSUPP); + if (error) { + *priority = 0; + } + log_operation(dpif, "queue_to_priority", error); + return error; + } void -dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class, +xfif_init(struct xfif *xfif, const struct xfif_class *xfif_class, const char *name, uint8_t netflow_engine_type, uint8_t netflow_engine_id) { diff --cc lib/xfif.h index 4059c8c43,000000000..05115f103 mode 100644,000000..100644 --- a/lib/xfif.h +++ b/lib/xfif.h @@@ -1,108 -1,0 +1,111 @@@ +/* + * Copyright (c) 2008, 2009, 2010 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef XFIF_H +#define XFIF_H 1 + +#include "openvswitch/xflow.h" +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct xfif; +struct ofpbuf; +struct svec; +struct xfif_class; + +void xf_run(void); +void xf_wait(void); + +int xf_register_provider(const struct xfif_class *); +int xf_unregister_provider(const char *type); +void xf_enumerate_types(struct svec *types); + +int xf_enumerate_names(const char *type, struct svec *names); +void xf_parse_name(const char *datapath_name, char **name, char **type); + +int xfif_open(const char *name, const char *type, struct xfif **); +int xfif_create(const char *name, const char *type, struct xfif **); +int xfif_create_and_open(const char *name, const char *type, struct xfif **); +void xfif_close(struct xfif *); + +const char *xfif_name(const struct xfif *); +const char *xfif_base_name(const struct xfif *); +int xfif_get_all_names(const struct xfif *, struct svec *); + +int xfif_delete(struct xfif *); + +int xfif_get_xf_stats(const struct xfif *, struct xflow_stats *); +int xfif_get_drop_frags(const struct xfif *, bool *drop_frags); +int xfif_set_drop_frags(struct xfif *, bool drop_frags); + +int xfif_port_add(struct xfif *, const char *devname, uint16_t flags, + uint16_t *port_no); +int xfif_port_del(struct xfif *, uint16_t port_no); +int xfif_port_query_by_number(const struct xfif *, uint16_t port_no, + struct xflow_port *); +int xfif_port_query_by_name(const struct xfif *, const char *devname, + struct xflow_port *); +int xfif_port_get_name(struct xfif *, uint16_t port_no, + char *name, size_t name_size); +int xfif_port_list(const struct xfif *, struct xflow_port **, size_t *n_ports); + +int xfif_port_poll(const struct xfif *, char **devnamep); +void xfif_port_poll_wait(const struct xfif *); + +int xfif_port_group_get(const struct xfif *, uint16_t group, + uint16_t **ports, size_t *n_ports); +int xfif_port_group_set(struct xfif *, uint16_t group, + const uint16_t ports[], size_t n_ports); + +int xfif_flow_flush(struct xfif *); +int xfif_flow_put(struct xfif *, struct xflow_flow_put *); +int xfif_flow_del(struct xfif *, struct xflow_flow *); +int xfif_flow_get(const struct xfif *, struct xflow_flow *); +int xfif_flow_get_multiple(const struct xfif *, struct xflow_flow[], size_t n); +int xfif_flow_list(const struct xfif *, struct xflow_flow[], size_t n, + size_t *n_out); +int xfif_flow_list_all(const struct xfif *, + struct xflow_flow **flowsp, size_t *np); + +int xfif_execute(struct xfif *, uint16_t in_port, + const union xflow_action[], size_t n_actions, + const struct ofpbuf *); + +int xfif_recv_get_mask(const struct xfif *, int *listen_mask); +int xfif_recv_set_mask(struct xfif *, int listen_mask); +int xfif_get_sflow_probability(const struct xfif *, uint32_t *probability); +int xfif_set_sflow_probability(struct xfif *, uint32_t probability); +int xfif_recv(struct xfif *, struct ofpbuf **); +int xfif_recv_purge(struct xfif *); +void xfif_recv_wait(struct xfif *); + +void xfif_get_netflow_ids(const struct xfif *, + uint8_t *engine_type, uint8_t *engine_id); + ++int xfif_queue_to_priority(const struct xfif *, uint32_t queue_id, ++ uint32_t *priority); ++ +#ifdef __cplusplus +} +#endif + +#endif /* xfif.h */ diff --cc lib/xflow-util.c index a673c386d,000000000..6c7fd8380 mode 100644,000000..100644 --- a/lib/xflow-util.c +++ b/lib/xflow-util.c @@@ -1,204 -1,0 +1,210 @@@ +/* + * Copyright (c) 2009, 2010 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "xflow-util.h" +#include +#include +#include +#include "coverage.h" +#include "dynamic-string.h" +#include "flow.h" +#include "packets.h" +#include "timeval.h" +#include "util.h" + +union xflow_action * +xflow_actions_add(struct xflow_actions *actions, uint16_t type) +{ + union xflow_action *a; + if (actions->n_actions < MAX_XFLOW_ACTIONS) { + a = &actions->actions[actions->n_actions++]; + } else { + COVERAGE_INC(xflow_overflow); + actions->n_actions = MAX_XFLOW_ACTIONS + 1; + a = &actions->actions[MAX_XFLOW_ACTIONS - 1]; + } + memset(a, 0, sizeof *a); + a->type = type; + return a; +} + +void +format_xflow_key(struct ds *ds, const struct xflow_key *key) +{ + ds_put_format(ds, "tunnel%"PRIx32":in_port%04x", + key->tun_id, key->in_port); + if (key->dl_tci) { + ds_put_format(ds, ":vlan%"PRIu16":pcp%d", + vlan_tci_to_vid(key->dl_tci), + vlan_tci_to_pcp(key->dl_tci)); + } + ds_put_format(ds, " mac"ETH_ADDR_FMT"->"ETH_ADDR_FMT" type%04x " + "proto%"PRId8" tos%"PRIu8" ip"IP_FMT"->"IP_FMT" port%d->%d", + ETH_ADDR_ARGS(key->dl_src), ETH_ADDR_ARGS(key->dl_dst), + ntohs(key->dl_type), key->nw_proto, key->nw_tos, + IP_ARGS(&key->nw_src), IP_ARGS(&key->nw_dst), + ntohs(key->tp_src), ntohs(key->tp_dst)); +} + +void +format_xflow_action(struct ds *ds, const union xflow_action *a) +{ + switch (a->type) { + case XFLOWAT_OUTPUT: + ds_put_format(ds, "%"PRIu16, a->output.port); + break; + case XFLOWAT_OUTPUT_GROUP: + ds_put_format(ds, "g%"PRIu16, a->output_group.group); + break; + case XFLOWAT_CONTROLLER: + ds_put_format(ds, "ctl(%"PRIu32")", a->controller.arg); + break; + case XFLOWAT_SET_TUNNEL: + ds_put_format(ds, "set_tunnel(0x%08"PRIx32")", ntohl(a->tunnel.tun_id)); + break; + case XFLOWAT_SET_DL_TCI: + ds_put_format(ds, "set_tci(%04"PRIx16",mask=%04"PRIx16")", + ntohs(a->dl_tci.tci), ntohs(a->dl_tci.mask)); + break; + case XFLOWAT_STRIP_VLAN: + ds_put_format(ds, "strip_vlan"); + break; + case XFLOWAT_SET_DL_SRC: + ds_put_format(ds, "set_dl_src("ETH_ADDR_FMT")", + ETH_ADDR_ARGS(a->dl_addr.dl_addr)); + break; + case XFLOWAT_SET_DL_DST: + ds_put_format(ds, "set_dl_dst("ETH_ADDR_FMT")", + ETH_ADDR_ARGS(a->dl_addr.dl_addr)); + break; + case XFLOWAT_SET_NW_SRC: + ds_put_format(ds, "set_nw_src("IP_FMT")", + IP_ARGS(&a->nw_addr.nw_addr)); + break; + case XFLOWAT_SET_NW_DST: + ds_put_format(ds, "set_nw_dst("IP_FMT")", + IP_ARGS(&a->nw_addr.nw_addr)); + break; + case XFLOWAT_SET_NW_TOS: + ds_put_format(ds, "set_nw_tos(%"PRIu8")", a->nw_tos.nw_tos); + break; + case XFLOWAT_SET_TP_SRC: + ds_put_format(ds, "set_tp_src(%"PRIu16")", ntohs(a->tp_port.tp_port)); + break; + case XFLOWAT_SET_TP_DST: + ds_put_format(ds, "set_tp_dst(%"PRIu16")", ntohs(a->tp_port.tp_port)); + break; ++ case XFLOWAT_SET_PRIORITY: ++ ds_put_format(ds, "set_priority(0x%"PRIx32")", a->priority.priority); ++ break; ++ case XFLOWAT_POP_PRIORITY: ++ ds_put_cstr(ds, "pop_priority"); ++ break; + default: + ds_put_format(ds, "***bad action 0x%"PRIx16"***", a->type); + break; + } +} + +void +format_xflow_actions(struct ds *ds, const union xflow_action *actions, + size_t n_actions) +{ + size_t i; + for (i = 0; i < n_actions; i++) { + if (i) { + ds_put_char(ds, ','); + } + format_xflow_action(ds, &actions[i]); + } + if (!n_actions) { + ds_put_cstr(ds, "drop"); + } +} + +void +format_xflow_flow_stats(struct ds *ds, const struct xflow_flow_stats *s) +{ + ds_put_format(ds, "packets:%llu, bytes:%llu, used:", + (unsigned long long int) s->n_packets, + (unsigned long long int) s->n_bytes); + if (s->used_sec) { + long long int used = s->used_sec * 1000 + s->used_nsec / 1000000; + ds_put_format(ds, "%.3fs", (time_msec() - used) / 1000.0); + } else { + ds_put_format(ds, "never"); + } +} + +void +format_xflow_flow(struct ds *ds, const struct xflow_flow *f) +{ + format_xflow_key(ds, &f->key); + ds_put_cstr(ds, ", "); + format_xflow_flow_stats(ds, &f->stats); + ds_put_cstr(ds, ", actions:"); + format_xflow_actions(ds, f->actions, f->n_actions); +} + +void +xflow_key_from_flow(struct xflow_key *key, const struct flow *flow) +{ + key->tun_id = flow->tun_id; + key->nw_src = flow->nw_src; + key->nw_dst = flow->nw_dst; + key->in_port = ofp_port_to_xflow_port(flow->in_port); + if (flow->dl_vlan == htons(OFP_VLAN_NONE)) { + key->dl_tci = htons(0); + } else { + uint16_t vid = flow->dl_vlan & htons(VLAN_VID_MASK); + uint16_t pcp = htons((flow->dl_vlan_pcp << VLAN_PCP_SHIFT) + & VLAN_PCP_MASK); + key->dl_tci = vid | pcp | htons(XFLOW_TCI_PRESENT); + } + key->dl_type = flow->dl_type; + key->tp_src = flow->tp_src; + key->tp_dst = flow->tp_dst; + memcpy(key->dl_src, flow->dl_src, ETH_ADDR_LEN); + memcpy(key->dl_dst, flow->dl_dst, ETH_ADDR_LEN); + key->nw_proto = flow->nw_proto; + key->nw_tos = flow->nw_tos; +} + +void +xflow_key_to_flow(const struct xflow_key *key, struct flow *flow) +{ + flow->wildcards = 0; + flow->priority = 0xffff; + flow->tun_id = key->tun_id; + flow->nw_src = key->nw_src; + flow->nw_dst = key->nw_dst; + flow->in_port = xflow_port_to_ofp_port(key->in_port); + if (key->dl_tci) { + flow->dl_vlan = htons(vlan_tci_to_vid(key->dl_tci)); + flow->dl_vlan_pcp = vlan_tci_to_pcp(key->dl_tci); + } else { + flow->dl_vlan = htons(OFP_VLAN_NONE); + flow->dl_vlan_pcp = 0; + } + flow->dl_type = key->dl_type; + flow->tp_src = key->tp_src; + flow->tp_dst = key->tp_dst; + memcpy(flow->dl_src, key->dl_src, ETH_ADDR_LEN); + memcpy(flow->dl_dst, key->dl_dst, ETH_ADDR_LEN); + flow->nw_proto = key->nw_proto; + flow->nw_tos = key->nw_tos; +} diff --cc ofproto/discovery.c index 1a9bb58de,008ad62ca..0fa9a14f0 --- a/ofproto/discovery.c +++ b/ofproto/discovery.c @@@ -30,11 -31,9 +30,11 @@@ #include "packets.h" #include "status.h" #include "stream-ssl.h" + #include "vlog.h" +#include "wdp.h" +#include "xfif.h" - #define THIS_MODULE VLM_discovery - #include "vlog.h" + VLOG_DEFINE_THIS_MODULE(discovery) struct discovery { char *dpif_name; diff --cc ofproto/in-band.c index 2a1e0a7e0,44bcd720a..15761b51a --- a/ofproto/in-band.c +++ b/ofproto/in-band.c @@@ -33,10 -35,9 +33,10 @@@ #include "poll-loop.h" #include "status.h" #include "timeval.h" + #include "vlog.h" +#include "wdp.h" - #define THIS_MODULE VLM_in_band - #include "vlog.h" + VLOG_DEFINE_THIS_MODULE(in_band) /* In-band control allows a single network to be used for OpenFlow * traffic and other data traffic. Refer to ovs-vswitchd.conf(5) and diff --cc ofproto/ofproto-sflow.c index a7901ce8d,c74c7360e..7ed61af10 --- a/ofproto/ofproto-sflow.c +++ b/ofproto/ofproto-sflow.c @@@ -30,11 -30,9 +30,11 @@@ #include "sflow_api.h" #include "socket-util.h" #include "timeval.h" + #include "vlog.h" +#include "wdp.h" +#include "xfif.h" - #define THIS_MODULE VLM_sflow - #include "vlog.h" + VLOG_DEFINE_THIS_MODULE(sflow) struct ofproto_sflow_port { struct netdev *netdev; /* Underlying network device, for stats. */ diff --cc ofproto/ofproto.c index 2fe73de5d,66b957e10..ed1f314b9 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@@ -55,12 -57,10 +55,12 @@@ #include "timeval.h" #include "unixctl.h" #include "vconn.h" + #include "vlog.h" +#include "wdp.h" +#include "xfif.h" #include "xtoxll.h" - #define THIS_MODULE VLM_ofproto - #include "vlog.h" + VLOG_DEFINE_THIS_MODULE(ofproto) #include "sflow_api.h" diff --cc ofproto/wdp-xflow.c index 42655f239,000000000..49dd2ffb4 mode 100644,000000..100644 --- a/ofproto/wdp-xflow.c +++ b/ofproto/wdp-xflow.c @@@ -1,2536 -1,0 +1,2549 @@@ +/* + * Copyright (c) 2010 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "wdp-xflow.h" + +#include +#include + +#include "coverage.h" +#include "dhcp.h" +#include "mac-learning.h" +#include "netdev.h" +#include "netflow.h" +#include "ofp-util.h" +#include "ofpbuf.h" +#include "ofproto.h" +#include "openflow/nicira-ext.h" +#include "openflow/openflow.h" +#include "packets.h" +#include "poll-loop.h" +#include "port-array.h" +#include "shash.h" +#include "stp.h" +#include "svec.h" +#include "timeval.h" +#include "util.h" +#include "vconn.h" +#include "wdp-provider.h" +#include "xfif.h" +#include "xflow-util.h" ++#include "vlog.h" +#include "xtoxll.h" + - #include /* XXX */ - #include /* XXX */ - - #define THIS_MODULE VLM_wdp_xflow - #include "vlog.h" ++VLOG_DEFINE_THIS_MODULE(wdp_xflow) + +enum { + TABLEID_HASH = 0, + TABLEID_CLASSIFIER = 1 +}; + +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + +/* Maximum numbers of rules. */ +#define WX_MAX_WILD 65536 /* Wildcarded rules. */ +#define WX_MAX_EXACT 1048576 /* Exact-match rules. */ + +struct wx { + struct list list_node; + struct wdp wdp; + struct xfif *xfif; + struct classifier cls; + struct netdev_monitor *netdev_monitor; + struct port_array ports; /* Index is xflow port nr; + * wdp_port->opp.port_no is OFP port nr. */ + struct shash port_by_name; + long long int next_expiration; + + /* Rules that might need to be revalidated. */ + bool need_revalidate; /* Revalidate all subrules? */ + bool revalidate_all; /* Revalidate all subrules and other rules? */ + struct tag_set revalidate_set; /* Tag set of (sub)rules to revalidate. */ + + /* Hooks for ovs-vswitchd. */ + const struct ofhooks *ofhooks; + void *aux; + + /* Used by default ofhooks. */ + struct mac_learning *ml; +}; + +static const struct ofhooks default_ofhooks; + +static struct list all_wx = LIST_INITIALIZER(&all_wx); + +static int wx_port_init(struct wx *); +static void wx_port_process_change(struct wx *wx, int error, char *devname, + wdp_port_poll_cb_func *cb, void *aux); +static void wx_port_refresh_groups(struct wx *); + +enum { + WX_GROUP_FLOOD = 0, + WX_GROUP_ALL = 1 +}; + +static struct wx * +wx_cast(const struct wdp *wdp) +{ + return CONTAINER_OF(wdp, struct wx, wdp); +} + +static int +wx_xlate_actions(struct wx *, const union ofp_action *, size_t n, + const flow_t *flow, const struct ofpbuf *packet, + tag_type *tags, struct xflow_actions *out, + bool *may_set_up_flow); + +struct wx_rule { + struct wdp_rule wr; + + uint64_t packet_count; /* Number of packets received. */ + uint64_t byte_count; /* Number of bytes received. */ + uint64_t accounted_bytes; /* Number of bytes passed to account_cb. */ + long long int used; /* Last-used time (0 if never used). */ + tag_type tags; /* Tags (set only by hooks). */ + + /* If 'super' is non-NULL, this rule is a subrule, that is, it is an + * exact-match rule (having cr.wc.wildcards of 0) generated from the + * wildcard rule 'super'. In this case, 'list' is an element of the + * super-rule's list. + * + * If 'super' is NULL, this rule is a super-rule, and 'list' is the head of + * a list of subrules. A super-rule with no wildcards (where + * cr.wc.wildcards is 0) will never have any subrules. */ + struct wx_rule *super; + struct list list; + + /* Datapath actions. + * + * A super-rule with wildcard fields never has xflow actions (since the + * datapath only supports exact-match flows). */ + bool installed; /* Installed in datapath? */ + bool may_install; /* True ordinarily; false if actions must + * be reassessed for every packet. */ + int n_xflow_actions; + union xflow_action *xflow_actions; +}; + +static void wx_rule_destroy(struct wx *, struct wx_rule *); +static void wx_rule_update_actions(struct wx *, struct wx_rule *); +static void wx_rule_execute(struct wx *, struct wx_rule *, + struct ofpbuf *packet, const flow_t *); +static bool wx_rule_make_actions(struct wx *, struct wx_rule *, + const struct ofpbuf *packet); +static void wx_rule_install(struct wx *, struct wx_rule *, + struct wx_rule *displaced_rule); + +static struct wx_rule * +wx_rule_cast(const struct cls_rule *cls_rule) +{ + return cls_rule ? CONTAINER_OF(cls_rule, struct wx_rule, wr.cr) : NULL; +} + +/* Returns true if 'rule' is merely an implementation detail that should be + * hidden from the client. */ +static inline bool +wx_rule_is_hidden(const struct wx_rule *rule) +{ + return rule->super != NULL; +} + +static void +wx_rule_free(struct wx_rule *rule) +{ + wdp_rule_uninit(&rule->wr); + free(rule->xflow_actions); + free(rule); +} + +static void +wx_rule_account(struct wx *wx OVS_UNUSED, struct wx_rule *rule OVS_UNUSED, + uint64_t extra_bytes OVS_UNUSED) +{ + /* XXX call account_cb hook */ +} + +static void +wx_rule_post_uninstall(struct wx *wx, struct wx_rule *rule) +{ + struct wx_rule *super = rule->super; + + wx_rule_account(wx, rule, 0); + + /* XXX netflow expiration */ + + if (super) { + super->packet_count += rule->packet_count; + super->byte_count += rule->byte_count; + + /* Reset counters to prevent double counting if the rule ever gets + * reinstalled. */ + rule->packet_count = 0; + rule->byte_count = 0; + rule->accounted_bytes = 0; + + //XXX netflow_flow_clear(&rule->nf_flow); + } +} + +static long long int +xflow_flow_stats_to_msec(const struct xflow_flow_stats *stats) +{ + return (stats->used_sec + ? stats->used_sec * 1000 + stats->used_nsec / 1000000 + : 0); +} + +static void +wx_rule_update_time(struct wx *wx OVS_UNUSED, struct wx_rule *rule, + const struct xflow_flow_stats *stats) +{ + long long int used = xflow_flow_stats_to_msec(stats); + if (used > rule->used) { + rule->used = used; + if (rule->super && used > rule->super->used) { + rule->super->used = used; + } + //XXX netflow_flow_update_time(ofproto->netflow, &rule->nf_flow, used); + } +} + +static void +wx_rule_update_stats(struct wx *wx, struct wx_rule *rule, + const struct xflow_flow_stats *stats) +{ + if (stats->n_packets) { + wx_rule_update_time(wx, rule, stats); + rule->packet_count += stats->n_packets; + rule->byte_count += stats->n_bytes; + /* XXX netflow_flow_update_flags(&rule->nf_flow, stats->ip_tos, + stats->tcp_flags); */ + } +} + +static void +wx_rule_uninstall(struct wx *wx, struct wx_rule *rule) +{ + assert(!rule->wr.cr.flow.wildcards); + if (rule->installed) { + struct xflow_flow xflow_flow; + + xflow_key_from_flow(&xflow_flow.key, &rule->wr.cr.flow); + xflow_flow.actions = NULL; + xflow_flow.n_actions = 0; + xflow_flow.flags = 0; + if (!xfif_flow_del(wx->xfif, &xflow_flow)) { + wx_rule_update_stats(wx, rule, &xflow_flow.stats); + } + rule->installed = false; + + wx_rule_post_uninstall(wx, rule); + } +} + +#if 0 +static bool +is_controller_rule(struct wx_rule *rule) +{ + /* If the only action is send to the controller then don't report + * NetFlow expiration messages since it is just part of the control + * logic for the network and not real traffic. */ + + return (rule + && rule->super + && rule->super->n_actions == 1 + && action_outputs_to_port(&rule->super->actions[0], + htons(OFPP_CONTROLLER))); +} +#endif + +static void +wx_rule_remove(struct wx *wx, struct wx_rule *rule) +{ + if (rule->wr.cr.flow.wildcards) { + COVERAGE_INC(wx_del_wc_flow); + wx->need_revalidate = true; + } else { + wx_rule_uninstall(wx, rule); + } + classifier_remove(&wx->cls, &rule->wr.cr); + wx_rule_destroy(wx, rule); +} + +static bool +wx_rule_revalidate(struct wx *wx, struct wx_rule *rule) +{ + const flow_t *flow = &rule->wr.cr.flow; + + COVERAGE_INC(wx_rule_revalidate); + if (rule->super) { + struct wx_rule *super; + super = wx_rule_cast(classifier_lookup_wild(&wx->cls, flow)); + if (!super) { + wx_rule_remove(wx, rule); + return false; + } else if (super != rule->super) { + COVERAGE_INC(wx_revalidate_moved); + list_remove(&rule->list); + list_push_back(&super->list, &rule->list); + rule->super = super; + rule->wr.hard_timeout = super->wr.hard_timeout; + rule->wr.idle_timeout = super->wr.idle_timeout; + rule->wr.created = super->wr.created; + rule->used = 0; + } + } + + wx_rule_update_actions(wx, rule); + return true; +} + +/* Destroys 'rule'. If 'rule' is a subrule, also removes it from its + * super-rule's list of subrules. If 'rule' is a super-rule, also iterates + * through all of its subrules and revalidates them, destroying any that no + * longer has a super-rule (which is probably all of them). + * + * Before calling this function, the caller must make have removed 'rule' from + * the classifier. If 'rule' is an exact-match rule, the caller is also + * responsible for ensuring that it has been uninstalled from the datapath. */ +static void +wx_rule_destroy(struct wx *wx, struct wx_rule *rule) +{ + if (!rule->super) { + struct wx_rule *subrule, *next; + LIST_FOR_EACH_SAFE (subrule, next, struct wx_rule, list, &rule->list) { + wx_rule_revalidate(wx, subrule); + } + } else { + list_remove(&rule->list); + } + wx_rule_free(rule); +} + +#if 0 +static bool +wx_rule_has_out_port(const struct wx_rule *rule, uint16_t out_port) +{ + const union ofp_action *oa; + struct actions_iterator i; + + if (out_port == htons(OFPP_NONE)) { + return true; + } + for (oa = actions_first(&i, rule->wr.actions, + rule->wr.n_actions); + oa; + oa = actions_next(&i)) { + if (oa->type == htons(OFPAT_OUTPUT) && oa->output.port == out_port) { + return true; + } + } + return false; +} +#endif + +/* Caller is responsible for initializing the 'cr' and ofp_table_id members of + * the returned rule. */ +static struct wx_rule * +wx_rule_create(struct wx_rule *super, + const union ofp_action *actions, size_t n_actions, + uint16_t idle_timeout, uint16_t hard_timeout) +{ + struct wx_rule *rule = xzalloc(sizeof *rule); + wdp_rule_init(&rule->wr, actions, n_actions); + rule->wr.idle_timeout = idle_timeout; + rule->wr.hard_timeout = hard_timeout; + rule->used = rule->wr.created; + rule->super = super; + if (super) { + list_push_back(&super->list, &rule->list); + } else { + list_init(&rule->list); + } +#if 0 + netflow_flow_clear(&rule->nf_flow); + netflow_flow_update_time(ofproto->netflow, &rule->nf_flow, rule->created); +#endif + + return rule; +} + +/* Executes the actions indicated by 'rule' on 'packet', which is in flow + * 'flow' and is considered to have arrived on xflow port 'in_port'. + * + * The flow that 'packet' actually contains does not need to actually match + * 'rule'; the actions in 'rule' will be applied to it either way. Likewise, + * the packet and byte counters for 'rule' will be credited for the packet sent + * out whether or not the packet actually matches 'rule'. + * + * If 'rule' is an exact-match rule and 'flow' actually equals the rule's flow, + * the caller must already have accurately composed xflow actions for it given + * 'packet' using rule_make_actions(). If 'rule' is a wildcard rule, or if + * 'rule' is an exact-match rule but 'flow' is not the rule's flow, then this + * function will compose a set of xflow actions based on 'rule''s OpenFlow + * actions and apply them to 'packet'. */ +static void +wx_rule_execute(struct wx *wx, struct wx_rule *rule, + struct ofpbuf *packet, const flow_t *flow) +{ + const union xflow_action *actions; + size_t n_actions; + struct xflow_actions a; + + /* Grab or compose the xflow actions. + * + * The special case for an exact-match 'rule' where 'flow' is not the + * rule's flow is important to avoid, e.g., sending a packet out its input + * port simply because the xflow actions were composed for the wrong + * scenario. */ + if (rule->wr.cr.flow.wildcards + || !flow_equal(flow, &rule->wr.cr.flow)) + { + struct wx_rule *super = rule->super ? rule->super : rule; + if (wx_xlate_actions(wx, super->wr.actions, super->wr.n_actions, flow, + packet, NULL, &a, NULL)) { + return; + } + actions = a.actions; + n_actions = a.n_actions; + } else { + actions = rule->xflow_actions; + n_actions = rule->n_xflow_actions; + } + + /* Execute the xflow actions. */ + if (!xfif_execute(wx->xfif, flow->in_port, + actions, n_actions, packet)) { + struct xflow_flow_stats stats; + flow_extract_stats(flow, packet, &stats); + wx_rule_update_stats(wx, rule, &stats); + rule->used = time_msec(); + //XXX netflow_flow_update_time(wx->netflow, &rule->nf_flow, rule->used); + } +} + +static void +wx_rule_insert(struct wx *wx, struct wx_rule *rule, struct ofpbuf *packet, + uint16_t in_port) +{ + struct wx_rule *displaced_rule; + + /* Insert the rule in the classifier. */ + displaced_rule = wx_rule_cast(classifier_insert(&wx->cls, &rule->wr.cr)); + if (!rule->wr.cr.flow.wildcards) { + wx_rule_make_actions(wx, rule, packet); + } + + /* Send the packet and credit it to the rule. */ + if (packet) { + flow_t flow; + flow_extract(packet, 0, in_port, &flow); + wx_rule_execute(wx, rule, packet, &flow); + } + + /* Install the rule in the datapath only after sending the packet, to + * avoid packet reordering. */ + if (rule->wr.cr.flow.wildcards) { + COVERAGE_INC(wx_add_wc_flow); + wx->need_revalidate = true; + } else { + wx_rule_install(wx, rule, displaced_rule); + } + + /* Free the rule that was displaced, if any. */ + if (displaced_rule) { + rule->wr.client_data = displaced_rule->wr.client_data; + wx_rule_destroy(wx, displaced_rule); + } +} + +static struct wx_rule * +wx_rule_create_subrule(struct wx *wx, struct wx_rule *rule, const flow_t *flow) +{ + struct wx_rule *subrule; + + subrule = wx_rule_create(rule, NULL, 0, + rule->wr.idle_timeout, + rule->wr.hard_timeout); + /* Subrules aren't really in any OpenFlow table, so don't bother with + * subrule->wr.ofp_table_id. */ + COVERAGE_INC(wx_subrule_create); + cls_rule_from_flow(flow, &subrule->wr.cr); + classifier_insert_exact(&wx->cls, &subrule->wr.cr); + + return subrule; +} + +/* Returns true if the actions changed, false otherwise. */ +static bool +wx_rule_make_actions(struct wx *wx, struct wx_rule *rule, + const struct ofpbuf *packet) +{ + const struct wx_rule *super; + struct xflow_actions a; + size_t actions_len; + + assert(!rule->wr.cr.flow.wildcards); + + super = rule->super ? rule->super : rule; + wx_xlate_actions(wx, super->wr.actions, super->wr.n_actions, + &rule->wr.cr.flow, packet, + &rule->tags, &a, &rule->may_install); + + actions_len = a.n_actions * sizeof *a.actions; + if (rule->n_xflow_actions != a.n_actions + || memcmp(rule->xflow_actions, a.actions, actions_len)) { + COVERAGE_INC(wx_xflow_unchanged); + free(rule->xflow_actions); + rule->n_xflow_actions = a.n_actions; + rule->xflow_actions = xmemdup(a.actions, actions_len); + return true; + } else { + return false; + } +} + +static int +do_put_flow(struct wx *wx, struct wx_rule *rule, int flags, + struct xflow_flow_put *put) +{ + memset(&put->flow.stats, 0, sizeof put->flow.stats); + xflow_key_from_flow(&put->flow.key, &rule->wr.cr.flow); + put->flow.actions = rule->xflow_actions; + put->flow.n_actions = rule->n_xflow_actions; + put->flow.flags = 0; + put->flags = flags; + return xfif_flow_put(wx->xfif, put); +} + +static void +wx_rule_install(struct wx *wx, struct wx_rule *rule, struct wx_rule *displaced_rule) +{ + assert(!rule->wr.cr.flow.wildcards); + + if (rule->may_install) { + struct xflow_flow_put put; + if (!do_put_flow(wx, rule, + XFLOWPF_CREATE | XFLOWPF_MODIFY | XFLOWPF_ZERO_STATS, + &put)) { + rule->installed = true; + if (displaced_rule) { + wx_rule_update_stats(wx, displaced_rule, &put.flow.stats); + wx_rule_post_uninstall(wx, displaced_rule); + } + } + } else if (displaced_rule) { + wx_rule_uninstall(wx, displaced_rule); + } +} + +static void +wx_rule_reinstall(struct wx *wx, struct wx_rule *rule) +{ + if (rule->installed) { + struct xflow_flow_put put; + COVERAGE_INC(wx_dp_missed); + do_put_flow(wx, rule, XFLOWPF_CREATE | XFLOWPF_MODIFY, &put); + } else { + wx_rule_install(wx, rule, NULL); + } +} + +static void +wx_rule_update_actions(struct wx *wx, struct wx_rule *rule) +{ + bool actions_changed; +#if 0 + uint16_t new_out_iface, old_out_iface; + + old_out_iface = rule->nf_flow.output_iface; +#endif + actions_changed = wx_rule_make_actions(wx, rule, NULL); + + if (rule->may_install) { + if (rule->installed) { + if (actions_changed) { + struct xflow_flow_put put; + do_put_flow(wx, rule, XFLOWPF_CREATE | XFLOWPF_MODIFY + | XFLOWPF_ZERO_STATS, &put); + wx_rule_update_stats(wx, rule, &put.flow.stats); +#if 0 + /* Temporarily set the old output iface so that NetFlow + * messages have the correct output interface for the old + * stats. */ + new_out_iface = rule->nf_flow.output_iface; + rule->nf_flow.output_iface = old_out_iface; +#endif + wx_rule_post_uninstall(wx, rule); + //rule->nf_flow.output_iface = new_out_iface; + } + } else { + wx_rule_install(wx, rule, NULL); + } + } else { + wx_rule_uninstall(wx, rule); + } +} + +static void +add_output_group_action(struct xflow_actions *actions, uint16_t group, + uint16_t *nf_output_iface) +{ + xflow_actions_add(actions, XFLOWAT_OUTPUT_GROUP)->output_group.group = group; + + if (group == WX_GROUP_ALL || group == WX_GROUP_FLOOD) { + *nf_output_iface = NF_OUT_FLOOD; + } +} + +static void - add_controller_action(struct xflow_actions *actions, - const struct ofp_action_output *oao) ++add_controller_action(struct xflow_actions *actions, uint16_t max_len) +{ + union xflow_action *a = xflow_actions_add(actions, XFLOWAT_CONTROLLER); - a->controller.arg = ntohs(oao->max_len); ++ a->controller.arg = max_len; +} + +struct wx_xlate_ctx { + /* Input. */ + flow_t flow; /* Flow to which these actions correspond. */ + int recurse; /* Recursion level, via xlate_table_action. */ + struct wx *wx; + const struct ofpbuf *packet; /* The packet corresponding to 'flow', or a + * null pointer if we are revalidating + * without a packet to refer to. */ + + /* Output. */ + struct xflow_actions *out; /* Datapath actions. */ + tag_type *tags; /* Tags associated with OFPP_NORMAL actions. */ + bool may_set_up_flow; /* True ordinarily; false if the actions must + * be reassessed for every packet. */ + uint16_t nf_output_iface; /* Output interface index for NetFlow. */ +}; + +static void do_xlate_actions(const union ofp_action *in, size_t n_in, + struct wx_xlate_ctx *ctx); + +static void +add_output_action(struct wx_xlate_ctx *ctx, uint16_t port) +{ + const struct wdp_port *wdp_port = port_array_get(&ctx->wx->ports, port); + + if (wdp_port) { + if (wdp_port->opp.config & OFPPC_NO_FWD) { + /* Forwarding disabled on port. */ + return; + } + } else { + /* + * We don't have an ofport record for this port, but it doesn't hurt to + * allow forwarding to it anyhow. Maybe such a port will appear later + * and we're pre-populating the flow table. + */ + } + + xflow_actions_add(ctx->out, XFLOWAT_OUTPUT)->output.port = port; + //ctx->nf_output_iface = port; +} + +static struct wx_rule * +wx_rule_lookup_valid(struct wx *wx, const flow_t *flow) +{ + struct wx_rule *rule = wx_rule_cast(classifier_lookup(&wx->cls, flow)); + + /* The rule we found might not be valid, since we could be in need of + * revalidation. If it is not valid, don't return it. */ + if (rule + && rule->super + && wx->need_revalidate + && !wx_rule_revalidate(wx, rule)) { + COVERAGE_INC(wx_invalidated); + return NULL; + } + + return rule; +} + +static void +xlate_table_action(struct wx_xlate_ctx *ctx, uint16_t in_port) +{ + if (!ctx->recurse) { + uint16_t old_in_port; + struct wx_rule *rule; + + /* Look up a flow with 'in_port' as the input port. Then restore the + * original input port (otherwise OFPP_NORMAL and OFPP_IN_PORT will + * have surprising behavior). */ + old_in_port = ctx->flow.in_port; + ctx->flow.in_port = in_port; + rule = wx_rule_lookup_valid(ctx->wx, &ctx->flow); + ctx->flow.in_port = old_in_port; + + if (rule) { + if (rule->super) { + rule = rule->super; + } + + ctx->recurse++; + do_xlate_actions(rule->wr.actions, rule->wr.n_actions, ctx); + ctx->recurse--; + } + } +} + +static void - xlate_output_action(struct wx_xlate_ctx *ctx, - const struct ofp_action_output *oao) ++xlate_output_action__(struct wx_xlate_ctx *ctx, ++ uint16_t port, uint16_t max_len) +{ + uint16_t xflow_port; + uint16_t prev_nf_output_iface = ctx->nf_output_iface; + + ctx->nf_output_iface = NF_OUT_DROP; + - switch (ntohs(oao->port)) { ++ switch (port) { + case OFPP_IN_PORT: + add_output_action(ctx, ctx->flow.in_port); + break; + case OFPP_TABLE: + xlate_table_action(ctx, ctx->flow.in_port); + break; + case OFPP_NORMAL: + if (!ctx->wx->ofhooks->normal_cb(&ctx->flow, ctx->packet, + ctx->out, ctx->tags, + &ctx->nf_output_iface, + ctx->wx->aux)) { + COVERAGE_INC(wx_uninstallable); + ctx->may_set_up_flow = false; + } + break; + + case OFPP_FLOOD: + add_output_group_action(ctx->out, WX_GROUP_FLOOD, + &ctx->nf_output_iface); + break; + case OFPP_ALL: + add_output_group_action(ctx->out, WX_GROUP_ALL, &ctx->nf_output_iface); + break; + case OFPP_CONTROLLER: - add_controller_action(ctx->out, oao); ++ add_controller_action(ctx->out, max_len); + break; + case OFPP_LOCAL: + add_output_action(ctx, XFLOWP_LOCAL); + break; + default: - xflow_port = ofp_port_to_xflow_port(ntohs(oao->port)); ++ xflow_port = ofp_port_to_xflow_port(port); + if (xflow_port != ctx->flow.in_port) { + add_output_action(ctx, xflow_port); + } + break; + } + + if (prev_nf_output_iface == NF_OUT_FLOOD) { + ctx->nf_output_iface = NF_OUT_FLOOD; + } else if (ctx->nf_output_iface == NF_OUT_DROP) { + ctx->nf_output_iface = prev_nf_output_iface; + } else if (prev_nf_output_iface != NF_OUT_DROP && + ctx->nf_output_iface != NF_OUT_FLOOD) { + ctx->nf_output_iface = NF_OUT_MULTI; + } +} + ++static void ++xlate_output_action(struct action_xlate_ctx *ctx, ++ const struct ofp_action_output *oao) ++{ ++ xlate_output_action__(ctx, ntohs(oao->port), ntohs(oao->max_len)); ++} ++ +/* If the final xflow action in 'ctx' is "pop priority", drop it, as an + * optimization, because we're going to add another action that sets the + * priority immediately after, or because there are no actions following the + * pop. */ +static void +remove_pop_action(struct wx_xlate_ctx *ctx) +{ + size_t n = ctx->out->n_actions; + if (n > 0 && ctx->out->actions[n - 1].type == XFLOWAT_POP_PRIORITY) { + ctx->out->n_actions--; + } +} + +static void +xlate_enqueue_action(struct wx_xlate_ctx *ctx, + const struct ofp_action_enqueue *oae) +{ + uint16_t ofp_port, xflow_port; ++ uint32_t priority; ++ int error; ++ ++ error = xfif_queue_to_priority(ctx->wx->xfif, ntohl(oae->queue_id), ++ &priority); ++ if (error) { ++ /* Fall back to ordinary output action. */ ++ xlate_output_action__(ctx, ntohs(oae->port), 0); ++ return; ++ } + + /* Figure out xflow output port. */ + ofp_port = ntohs(oae->port); + if (ofp_port != OFPP_IN_PORT) { + xflow_port = ofp_port_to_xflow_port(ofp_port); + } else { + xflow_port = ctx->flow.in_port; + } + + /* Add xflow actions. */ + remove_pop_action(ctx); + xflow_actions_add(ctx->out, XFLOWAT_SET_PRIORITY)->priority.priority - = TC_H_MAKE(1, ntohl(oae->queue_id)); /* XXX */ ++ = priority; + add_output_action(ctx, xflow_port); + xflow_actions_add(ctx->out, XFLOWAT_POP_PRIORITY); + + /* Update NetFlow output port. */ + if (ctx->nf_output_iface == NF_OUT_DROP) { + ctx->nf_output_iface = xflow_port; + } else if (ctx->nf_output_iface != NF_OUT_FLOOD) { + ctx->nf_output_iface = NF_OUT_MULTI; + } +} + +static void +xlate_nicira_action(struct wx_xlate_ctx *ctx, + const struct nx_action_header *nah) +{ + const struct nx_action_resubmit *nar; + const struct nx_action_set_tunnel *nast; + union xflow_action *oa; + int subtype = ntohs(nah->subtype); + + assert(nah->vendor == htonl(NX_VENDOR_ID)); + switch (subtype) { + case NXAST_RESUBMIT: + nar = (const struct nx_action_resubmit *) nah; + xlate_table_action(ctx, ofp_port_to_xflow_port(ntohs(nar->in_port))); + break; + + case NXAST_SET_TUNNEL: + nast = (const struct nx_action_set_tunnel *) nah; + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_TUNNEL); + ctx->flow.tun_id = oa->tunnel.tun_id = nast->tun_id; + break; + + /* If you add a new action here that modifies flow data, don't forget to + * update the flow key in ctx->flow at the same time. */ + + default: + VLOG_DBG_RL(&rl, "unknown Nicira action type %"PRIu16, subtype); + break; + } +} + +static void +do_xlate_actions(const union ofp_action *in, size_t n_in, + struct wx_xlate_ctx *ctx) +{ + struct actions_iterator iter; + const union ofp_action *ia; + const struct wdp_port *port; + + port = port_array_get(&ctx->wx->ports, ctx->flow.in_port); + if (port && port->opp.config & (OFPPC_NO_RECV | OFPPC_NO_RECV_STP) && + port->opp.config & (eth_addr_equals(ctx->flow.dl_dst, stp_eth_addr) + ? OFPPC_NO_RECV_STP : OFPPC_NO_RECV)) { + /* Drop this flow. */ + return; + } + + for (ia = actions_first(&iter, in, n_in); ia; ia = actions_next(&iter)) { + uint16_t type = ntohs(ia->type); + union xflow_action *oa; + + switch (type) { + case OFPAT_OUTPUT: + xlate_output_action(ctx, &ia->output); + break; + + case OFPAT_SET_VLAN_VID: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_DL_TCI); + oa->dl_tci.tci = ia->vlan_vid.vlan_vid & htons(VLAN_VID_MASK); + oa->dl_tci.mask = htons(VLAN_VID_MASK); + ctx->flow.dl_vlan = ia->vlan_vid.vlan_vid; + break; + + case OFPAT_SET_VLAN_PCP: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_DL_TCI); + oa->dl_tci.tci = htons((ia->vlan_pcp.vlan_pcp << VLAN_PCP_SHIFT) + & VLAN_PCP_MASK); + oa->dl_tci.mask = htons(VLAN_PCP_MASK); + + if (ctx->flow.dl_vlan == htons(OFP_VLAN_NONE)) { + ctx->flow.dl_vlan = htons(0); + } + ctx->flow.dl_vlan_pcp = ia->vlan_pcp.vlan_pcp; + break; + + case OFPAT_STRIP_VLAN: + xflow_actions_add(ctx->out, XFLOWAT_STRIP_VLAN); + ctx->flow.dl_vlan = htons(OFP_VLAN_NONE); + ctx->flow.dl_vlan_pcp = 0; + break; + + case OFPAT_SET_DL_SRC: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_DL_SRC); + memcpy(oa->dl_addr.dl_addr, + ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN); + memcpy(ctx->flow.dl_src, + ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN); + break; + + case OFPAT_SET_DL_DST: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_DL_DST); + memcpy(oa->dl_addr.dl_addr, + ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN); + memcpy(ctx->flow.dl_dst, + ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN); + break; + + case OFPAT_SET_NW_SRC: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_NW_SRC); + ctx->flow.nw_src = oa->nw_addr.nw_addr = ia->nw_addr.nw_addr; + break; + + case OFPAT_SET_NW_DST: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_NW_DST); + ctx->flow.nw_dst = oa->nw_addr.nw_addr = ia->nw_addr.nw_addr; + break; + + case OFPAT_SET_NW_TOS: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_NW_TOS); + ctx->flow.nw_tos = oa->nw_tos.nw_tos = ia->nw_tos.nw_tos; + break; + + case OFPAT_SET_TP_SRC: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_TP_SRC); + ctx->flow.tp_src = oa->tp_port.tp_port = ia->tp_port.tp_port; + break; + + case OFPAT_SET_TP_DST: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_TP_DST); + ctx->flow.tp_dst = oa->tp_port.tp_port = ia->tp_port.tp_port; + break; + + case OFPAT_ENQUEUE: + xlate_enqueue_action(ctx, (const struct ofp_action_enqueue *) ia); + break; + + case OFPAT_VENDOR: + xlate_nicira_action(ctx, (const struct nx_action_header *) ia); + break; + + default: + VLOG_DBG_RL(&rl, "unknown action type %"PRIu16, type); + break; + } + } +} + +/* Returns true if 'flow' and 'actions' may be set up as a flow in the kernel. + * This is true most of the time, but we don't allow flows that would prevent + * DHCP replies from being seen by the local port to be set up in the + * kernel. + * + * We only need this, strictly speaking, when in-band control is turned on. */ +static bool +wx_may_set_up(const flow_t *flow, const struct xflow_actions *actions) +{ + if (flow->dl_type == htons(ETH_TYPE_IP) + && flow->nw_proto == IP_TYPE_UDP + && flow->tp_src == htons(DHCP_SERVER_PORT) + && flow->tp_dst == htons(DHCP_CLIENT_PORT)) { + int i; + + for (i = 0; i < actions->n_actions; i++) { + const struct xflow_action_output *oao = &actions->actions[i].output; + if (oao->type == XFLOWAT_OUTPUT && oao->port == XFLOWP_LOCAL) { + return true; + } + } + return false; + } + + return true; +} + +static int +wx_xlate_actions(struct wx *wx, const union ofp_action *in, size_t n_in, + const flow_t *flow, const struct ofpbuf *packet, + tag_type *tags, struct xflow_actions *out, + bool *may_set_up_flow) +{ + tag_type no_tags = 0; + struct wx_xlate_ctx ctx; + COVERAGE_INC(wx_ofp2xflow); + xflow_actions_init(out); + ctx.flow = *flow; + ctx.recurse = 0; + ctx.wx = wx; + ctx.packet = packet; + ctx.out = out; + ctx.tags = tags ? tags : &no_tags; + ctx.may_set_up_flow = true; + ctx.nf_output_iface = NF_OUT_DROP; + do_xlate_actions(in, n_in, &ctx); + remove_pop_action(&ctx); + + if (may_set_up_flow) { + *may_set_up_flow = ctx.may_set_up_flow && wx_may_set_up(flow, out); + } +#if 0 + if (nf_output_iface) { + *nf_output_iface = ctx.nf_output_iface; + } +#endif + if (xflow_actions_overflow(out)) { + xflow_actions_init(out); + return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_TOO_MANY); + } + return 0; +} + +static void +update_used(struct wx *wx) +{ + struct xflow_flow *flows; + size_t n_flows; + size_t i; + int error; + + error = xfif_flow_list_all(wx->xfif, &flows, &n_flows); + if (error) { + return; + } + + for (i = 0; i < n_flows; i++) { + struct xflow_flow *f = &flows[i]; + struct wx_rule *rule; + flow_t flow; + + xflow_key_to_flow(&f->key, &flow); + rule = wx_rule_cast(classifier_find_rule_exactly(&wx->cls, &flow)); + if (!rule || !rule->installed) { + COVERAGE_INC(wx_unexpected_rule); + xfif_flow_del(wx->xfif, f); + continue; + } + + wx_rule_update_time(wx, rule, &f->stats); + wx_rule_account(wx, rule, f->stats.n_bytes); + } + free(flows); +} + +static void +uninstall_idle_flow(struct wx *wx, struct wx_rule *rule) +{ + assert(rule->installed); + assert(!rule->wr.cr.flow.wildcards); + + if (rule->super) { + wx_rule_remove(wx, rule); + } else { + wx_rule_uninstall(wx, rule); + } +} + +static void +expire_rule(struct cls_rule *cls_rule, void *wx_) +{ + struct wx *wx = wx_; + struct wx_rule *rule = wx_rule_cast(cls_rule); + long long int hard_expire, idle_expire, expire, now; + + hard_expire = (rule->wr.hard_timeout + ? rule->wr.created + rule->wr.hard_timeout * 1000 + : LLONG_MAX); + idle_expire = (rule->wr.idle_timeout + && (rule->super || list_is_empty(&rule->list)) + ? rule->used + rule->wr.idle_timeout * 1000 + : LLONG_MAX); + expire = MIN(hard_expire, idle_expire); + + now = time_msec(); + if (now < expire) { + if (rule->installed && now >= rule->used + 5000) { + uninstall_idle_flow(wx, rule); + } else if (!rule->wr.cr.flow.wildcards) { + //XXX active_timeout(wx, rule); + } + + return; + } + + COVERAGE_INC(wx_expired); + + /* Update stats. This code will be a no-op if the rule expired + * due to an idle timeout. */ + if (rule->wr.cr.flow.wildcards) { + struct wx_rule *subrule, *next; + LIST_FOR_EACH_SAFE (subrule, next, struct wx_rule, list, &rule->list) { + wx_rule_remove(wx, subrule); + } + } else { + wx_rule_uninstall(wx, rule); + } + +#if 0 /* XXX */ + if (!wx_rule_is_hidden(rule)) { + send_flow_removed(wx, rule, now, + (now >= hard_expire + ? OFPRR_HARD_TIMEOUT : OFPRR_IDLE_TIMEOUT)); + } +#endif + wx_rule_remove(wx, rule); +} + +struct revalidate_cbdata { + struct wx *wx; + bool revalidate_all; /* Revalidate all exact-match rules? */ + bool revalidate_subrules; /* Revalidate all exact-match subrules? */ + struct tag_set revalidate_set; /* Set of tags to revalidate. */ +}; + +static bool +revalidate_rule(struct wx *wx, struct wx_rule *rule) +{ + const flow_t *flow = &rule->wr.cr.flow; + + COVERAGE_INC(wx_revalidate_rule); + if (rule->super) { + struct wx_rule *super; + super = wx_rule_cast(classifier_lookup_wild(&wx->cls, flow)); + if (!super) { + wx_rule_remove(wx, rule); + return false; + } else if (super != rule->super) { + COVERAGE_INC(wx_revalidate_moved); + list_remove(&rule->list); + list_push_back(&super->list, &rule->list); + rule->super = super; + rule->wr.hard_timeout = super->wr.hard_timeout; + rule->wr.idle_timeout = super->wr.idle_timeout; + rule->wr.created = super->wr.created; + rule->used = 0; + } + } + + wx_rule_update_actions(wx, rule); + return true; +} + +static void +revalidate_cb(struct cls_rule *sub_, void *cbdata_) +{ + struct wx_rule *sub = wx_rule_cast(sub_); + struct revalidate_cbdata *cbdata = cbdata_; + + if (cbdata->revalidate_all + || (cbdata->revalidate_subrules && sub->super) + || tag_set_intersects(&cbdata->revalidate_set, sub->tags)) { + revalidate_rule(cbdata->wx, sub); + } +} + +static void +wx_run_one(struct wx *wx) +{ + if (time_msec() >= wx->next_expiration) { + COVERAGE_INC(wx_expiration); + wx->next_expiration = time_msec() + 1000; + update_used(wx); + + classifier_for_each(&wx->cls, CLS_INC_ALL, expire_rule, wx); + + /* XXX account_checkpoint_cb */ + } + + if (wx->need_revalidate || !tag_set_is_empty(&wx->revalidate_set)) { + struct revalidate_cbdata cbdata; + cbdata.wx = wx; + cbdata.revalidate_all = wx->revalidate_all; + cbdata.revalidate_subrules = wx->need_revalidate; + cbdata.revalidate_set = wx->revalidate_set; + tag_set_init(&wx->revalidate_set); + COVERAGE_INC(wx_revalidate); + classifier_for_each(&wx->cls, CLS_INC_EXACT, revalidate_cb, &cbdata); + wx->need_revalidate = false; + } +} + +static void +wx_run(void) +{ + struct wx *wx; + + LIST_FOR_EACH (wx, struct wx, list_node, &all_wx) { + wx_run_one(wx); + } + xf_run(); +} + +static void +wx_wait_one(struct wx *wx) +{ + if (wx->need_revalidate || !tag_set_is_empty(&wx->revalidate_set)) { + poll_immediate_wake(); + } else if (wx->next_expiration != LLONG_MAX) { + poll_timer_wait_until(wx->next_expiration); + } +} + +static void +wx_wait(void) +{ + struct wx *wx; + + LIST_FOR_EACH (wx, struct wx, list_node, &all_wx) { + wx_wait_one(wx); + } + xf_wait(); +} + +static int wx_flow_flush(struct wdp *); + +static int +wx_enumerate(const struct wdp_class *wdp_class, struct svec *all_wdps) +{ + struct svec names = SVEC_EMPTY_INITIALIZER; + int error = xf_enumerate_names(wdp_class->type, &names); + svec_move(all_wdps, &names); + return error; +} + +static int +wx_open(const struct wdp_class *wdp_class, const char *name, bool create, + struct wdp **wdpp) +{ + struct xfif *xfif; + int error; + + error = (create + ? xfif_create_and_open(name, wdp_class->type, &xfif) + : xfif_open(name, wdp_class->type, &xfif)); + if (!error) { + struct wx *wx; + + wx = xzalloc(sizeof *wx); + list_push_back(&all_wx, &wx->list_node); + wdp_init(&wx->wdp, wdp_class, name, 0, 0); + wx->xfif = xfif; + classifier_init(&wx->cls); + wx->netdev_monitor = netdev_monitor_create(); + port_array_init(&wx->ports); + shash_init(&wx->port_by_name); + wx->next_expiration = time_msec() + 1000; + tag_set_init(&wx->revalidate_set); + + wx_port_init(wx); + + wx->ofhooks = &default_ofhooks; + wx->aux = wx; + wx->ml = mac_learning_create(); + + *wdpp = &wx->wdp; + } + + return error; +} + +static void +wx_close(struct wdp *wdp) +{ + struct wx *wx = wx_cast(wdp); + + wx_flow_flush(wdp); + xfif_close(wx->xfif); + classifier_destroy(&wx->cls); + netdev_monitor_destroy(wx->netdev_monitor); + list_remove(&wx->list_node); + mac_learning_destroy(wx->ml); + free(wx); +} + +static int +wx_get_all_names(const struct wdp *wdp, struct svec *all_names) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_get_all_names(wx->xfif, all_names); +} + +static int +wx_destroy(struct wdp *wdp) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_delete(wx->xfif); +} + +static int +wx_get_features(const struct wdp *wdp, struct ofpbuf **featuresp) +{ + struct wx *wx = wx_cast(wdp); + struct ofp_switch_features *osf; + struct ofpbuf *buf; + unsigned int port_no; + struct wdp_port *port; + + buf = ofpbuf_new(sizeof *osf); + osf = ofpbuf_put_zeros(buf, sizeof *osf); + osf->n_tables = 2; + osf->capabilities = htonl(OFPC_ARP_MATCH_IP); + osf->actions = htonl((1u << OFPAT_OUTPUT) | + (1u << OFPAT_SET_VLAN_VID) | + (1u << OFPAT_SET_VLAN_PCP) | + (1u << OFPAT_STRIP_VLAN) | + (1u << OFPAT_SET_DL_SRC) | + (1u << OFPAT_SET_DL_DST) | + (1u << OFPAT_SET_NW_SRC) | + (1u << OFPAT_SET_NW_DST) | + (1u << OFPAT_SET_NW_TOS) | + (1u << OFPAT_SET_TP_SRC) | + (1u << OFPAT_SET_TP_DST) | + (1u << OFPAT_ENQUEUE)); + + PORT_ARRAY_FOR_EACH (port, &wx->ports, port_no) { + hton_ofp_phy_port(ofpbuf_put(buf, &port->opp, sizeof port->opp)); + } + + *featuresp = buf; + return 0; +} + +static void +count_subrules(struct cls_rule *cls_rule, void *n_subrules_) +{ + struct wx_rule *rule = wx_rule_cast(cls_rule); + int *n_subrules = n_subrules_; + + if (rule->super) { + (*n_subrules)++; + } +} + +static int +wx_get_stats(const struct wdp *wdp, struct wdp_stats *stats) +{ + struct wx *wx = wx_cast(wdp); + struct xflow_stats xflow_stats; + int error; + + error = xfif_get_xf_stats(wx->xfif, &xflow_stats); + stats->max_ports = xflow_stats.max_ports; + return error; +} + +static int +wx_get_table_stats(const struct wdp *wdp, struct ofpbuf *stats) +{ + struct wx *wx = wx_cast(wdp); + struct xflow_stats xflow_stats; + struct ofp_table_stats *exact, *wild; + int n_subrules; + + xfif_get_xf_stats(wx->xfif, &xflow_stats); + /* XXX should pass up errors, but there are no appropriate OpenFlow error + * codes. */ + + n_subrules = 0; + classifier_for_each(&wx->cls, CLS_INC_EXACT, count_subrules, &n_subrules); + + exact = ofpbuf_put_zeros(stats, sizeof *exact); + exact->table_id = TABLEID_HASH; + strcpy(exact->name, "exact"); + exact->wildcards = htonl(0); + exact->max_entries = htonl(MIN(WX_MAX_EXACT, xflow_stats.max_capacity)); + exact->active_count = htonl(classifier_count_exact(&wx->cls) - n_subrules); + exact->lookup_count = htonll(xflow_stats.n_hit + xflow_stats.n_missed); + exact->matched_count = htonll(xflow_stats.n_hit); + + wild = ofpbuf_put_zeros(stats, sizeof *exact); + wild->table_id = TABLEID_CLASSIFIER; + strcpy(wild->name, "classifier"); + wild->wildcards = htonl(OVSFW_ALL); + wild->max_entries = htonl(WX_MAX_WILD); + wild->active_count = htonl(classifier_count_wild(&wx->cls)); + wild->lookup_count = htonll(0); /* XXX */ + wild->matched_count = htonll(0); /* XXX */ + + return 0; +} + +static int +wx_get_drop_frags(const struct wdp *wdp, bool *drop_frags) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_get_drop_frags(wx->xfif, drop_frags); +} + +static int +wx_set_drop_frags(struct wdp *wdp, bool drop_frags) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_set_drop_frags(wx->xfif, drop_frags); +} + +static int +wx_port_add(struct wdp *wdp, const char *devname, + bool internal, uint16_t *port_no) +{ + struct wx *wx = wx_cast(wdp); + uint16_t xflow_flags = internal ? XFLOW_PORT_INTERNAL : 0; + return xfif_port_add(wx->xfif, devname, xflow_flags, port_no); +} + +static int +wx_port_del(struct wdp *wdp, uint16_t port_no) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_port_del(wx->xfif, port_no); +} + +static int +wx_answer_port_query(const struct wdp_port *port, struct wdp_port *portp) +{ + if (port) { + wdp_port_copy(portp, port); + return 0; + } else { + return ENOENT; + } +} + +static int +wx_port_query_by_number(const struct wdp *wdp, uint16_t port_no, + struct wdp_port *portp) +{ + struct wx *wx = wx_cast(wdp); + const struct wdp_port *port; + + port = port_array_get(&wx->ports, ofp_port_to_xflow_port(port_no)); + return wx_answer_port_query(port, portp); +} + +static int +wx_port_query_by_name(const struct wdp *wdp, const char *devname, + struct wdp_port *portp) +{ + struct wx *wx = wx_cast(wdp); + + return wx_answer_port_query(shash_find_data(&wx->port_by_name, devname), + portp); +} + +static int +wx_port_set_config(struct wdp *wdp, uint16_t port_no, uint32_t config) +{ + struct wx *wx = wx_cast(wdp); + struct wdp_port *port; + uint32_t changes; + + port = port_array_get(&wx->ports, ofp_port_to_xflow_port(port_no)); + if (!port) { + return ENOENT; + } + changes = config ^ port->opp.config; + + if (changes & OFPPC_PORT_DOWN) { + int error; + if (config & OFPPC_PORT_DOWN) { + error = netdev_turn_flags_off(port->netdev, NETDEV_UP, true); + } else { + error = netdev_turn_flags_on(port->netdev, NETDEV_UP, true); + } + if (!error) { + port->opp.config ^= OFPPC_PORT_DOWN; + } + } + +#define REVALIDATE_BITS (OFPPC_NO_RECV | OFPPC_NO_RECV_STP | OFPPC_NO_FWD) + if (changes & REVALIDATE_BITS) { + COVERAGE_INC(wx_costly_flags); + port->opp.config ^= changes & REVALIDATE_BITS; + wx->need_revalidate = true; + } +#undef REVALIDATE_BITS + + if (changes & OFPPC_NO_FLOOD) { + port->opp.config ^= OFPPC_NO_FLOOD; + wx_port_refresh_groups(wx); + } + + if (changes & OFPPC_NO_PACKET_IN) { + port->opp.config ^= OFPPC_NO_PACKET_IN; + } + + return 0; +} + +static int +wx_port_list(const struct wdp *wdp, struct wdp_port **portsp, size_t *n_portsp) +{ + struct wx *wx = wx_cast(wdp); + struct wdp_port *ports, *port; + unsigned int port_no; + size_t n_ports, i; + + *n_portsp = n_ports = port_array_count(&wx->ports); + *portsp = ports = xmalloc(n_ports * sizeof *ports); + i = 0; + PORT_ARRAY_FOR_EACH (port, &wx->ports, port_no) { + wdp_port_copy(&ports[i++], port); + } + assert(i == n_ports); + + return 0; +} + +static int +wx_port_poll(struct wdp *wdp, wdp_port_poll_cb_func *cb, void *aux) +{ + struct wx *wx = wx_cast(wdp); + char *devname; + int retval; + int error; + + retval = 0; + while ((error = xfif_port_poll(wx->xfif, &devname)) != EAGAIN) { + wx_port_process_change(wx, error, devname, cb, aux); + if (error && error != ENOBUFS) { + retval = error; + } + } + while ((error = netdev_monitor_poll(wx->netdev_monitor, + &devname)) != EAGAIN) { + wx_port_process_change(wx, error, devname, cb, aux); + if (error && error != ENOBUFS) { + retval = error; + } + } + return retval; +} + +static int +wx_port_poll_wait(const struct wdp *wdp) +{ + struct wx *wx = wx_cast(wdp); + + xfif_port_poll_wait(wx->xfif); + netdev_monitor_poll_wait(wx->netdev_monitor); + return 0; +} + +static struct wdp_rule * +wx_flow_get(const struct wdp *wdp, const flow_t *flow, unsigned int include) +{ + struct wx *wx = wx_cast(wdp); + struct wx_rule *rule; + int table_id; + + table_id = flow->wildcards ? TABLEID_CLASSIFIER : TABLEID_HASH; + if (!(include & (1u << table_id))) { + return NULL; + } + + rule = wx_rule_cast(classifier_find_rule_exactly(&wx->cls, flow)); + return rule && !wx_rule_is_hidden(rule) ? &rule->wr : NULL; +} + +static struct wdp_rule * +wx_flow_match(const struct wdp *wdp, const flow_t *flow) +{ + struct wx *wx = wx_cast(wdp); + struct wx_rule *rule; + + rule = wx_rule_cast(classifier_lookup(&wx->cls, flow)); + if (rule) { + if (wx_rule_is_hidden(rule)) { + rule = rule->super; + } + return &rule->wr; + } else { + return NULL; + } +} + +struct wx_for_each_thunk_aux { + wdp_flow_cb_func *client_callback; + void *client_aux; +}; + +static void +wx_for_each_thunk(struct cls_rule *cls_rule, void *aux_) +{ + struct wx_for_each_thunk_aux *aux = aux_; + struct wx_rule *rule = wx_rule_cast(cls_rule); + + if (!wx_rule_is_hidden(rule)) { + aux->client_callback(&rule->wr, aux->client_aux); + } +} + +static void +wx_flow_for_each_match(const struct wdp *wdp, const flow_t *target, + unsigned int include, + wdp_flow_cb_func *client_callback, void *client_aux) +{ + struct wx *wx = wx_cast(wdp); + struct wx_for_each_thunk_aux aux; + int cls_include; + + cls_include = 0; + if (include & (1u << TABLEID_HASH)) { + cls_include |= CLS_INC_EXACT; + } + if (include & (1u << TABLEID_CLASSIFIER)) { + cls_include |= CLS_INC_WILD; + } + + aux.client_callback = client_callback; + aux.client_aux = client_aux; + classifier_for_each_match(&wx->cls, target, cls_include, + wx_for_each_thunk, &aux); +} + +/* Obtains statistic counters for 'rule' within 'wx' and stores them into + * '*stats'. If 'rule' is a wildcarded rule, the returned statistic include + * statistics for all of 'rule''s subrules. */ +static void +query_stats(struct wx *wx, struct wx_rule *rule, struct wdp_flow_stats *stats) +{ + struct wx_rule *subrule; + struct xflow_flow *xflow_flows; + size_t n_xflow_flows; + + /* Start from historical data for 'rule' itself that are no longer tracked + * by the datapath. This counts, for example, subrules that have + * expired. */ + stats->n_packets = rule->packet_count; + stats->n_bytes = rule->byte_count; + stats->inserted = rule->wr.created; + stats->used = LLONG_MIN; + stats->tcp_flags = 0; + stats->ip_tos = 0; + + /* Prepare to ask the datapath for statistics on 'rule', or if it is + * wildcarded then on all of its subrules. + * + * Also, add any statistics that are not tracked by the datapath for each + * subrule. This includes, for example, statistics for packets that were + * executed "by hand" by ofproto via xfif_execute() but must be accounted + * to a flow. */ + n_xflow_flows = rule->wr.cr.flow.wildcards ? list_size(&rule->list) : 1; + xflow_flows = xzalloc(n_xflow_flows * sizeof *xflow_flows); + if (rule->wr.cr.flow.wildcards) { + size_t i = 0; + LIST_FOR_EACH (subrule, struct wx_rule, list, &rule->list) { + xflow_key_from_flow(&xflow_flows[i++].key, &subrule->wr.cr.flow); + stats->n_packets += subrule->packet_count; + stats->n_bytes += subrule->byte_count; + } + } else { + xflow_key_from_flow(&xflow_flows[0].key, &rule->wr.cr.flow); + } + + /* Fetch up-to-date statistics from the datapath and add them in. */ + if (!xfif_flow_get_multiple(wx->xfif, xflow_flows, n_xflow_flows)) { + size_t i; + for (i = 0; i < n_xflow_flows; i++) { + struct xflow_flow *xflow_flow = &xflow_flows[i]; + long long int used; + + stats->n_packets += xflow_flow->stats.n_packets; + stats->n_bytes += xflow_flow->stats.n_bytes; + used = xflow_flow_stats_to_msec(&xflow_flow->stats); + if (used > stats->used) { + stats->used = used; + if (xflow_flow->key.dl_type == htons(ETH_TYPE_IP) + && xflow_flow->key.nw_proto == IP_TYPE_TCP) { + stats->ip_tos = xflow_flow->stats.ip_tos; + } + } + stats->tcp_flags |= xflow_flow->stats.tcp_flags; + } + } + free(xflow_flows); +} + +static int +wx_flow_get_stats(const struct wdp *wdp, + const struct wdp_rule *wdp_rule, + struct wdp_flow_stats *stats) +{ + struct wx *wx = wx_cast(wdp); + struct wx_rule *rule = wx_rule_cast(&wdp_rule->cr); + + query_stats(wx, rule, stats); + return 0; +} + +static bool +wx_flow_overlaps(const struct wdp *wdp, const flow_t *flow) +{ + struct wx *wx = wx_cast(wdp); + + /* XXX overlap with a subrule? */ + return classifier_rule_overlaps(&wx->cls, flow); +} + +static int +wx_flow_put(struct wdp *wdp, const struct wdp_flow_put *put, + struct wdp_flow_stats *old_stats, struct wdp_rule **rulep) +{ + struct wx *wx = wx_cast(wdp); + struct wx_rule *rule; + uint8_t ofp_table_id; + + ofp_table_id = put->flow->wildcards ? TABLEID_CLASSIFIER : TABLEID_HASH; + if (put->ofp_table_id != 0xff && put->ofp_table_id != ofp_table_id) { + return EINVAL; + } + + rule = wx_rule_cast(classifier_find_rule_exactly(&wx->cls, put->flow)); + if (rule && wx_rule_is_hidden(rule)) { + rule = NULL; + } + + if (rule) { + if (!(put->flags & WDP_PUT_MODIFY)) { + return EEXIST; + } + } else { + if (!(put->flags & WDP_PUT_CREATE)) { + return EINVAL; + } + if ((put->flow->wildcards + ? classifier_count_wild(&wx->cls) >= WX_MAX_WILD + : classifier_count_exact(&wx->cls) >= WX_MAX_EXACT)) { + /* XXX subrules should not count against exact-match limit */ + return ENOBUFS; + } + } + + rule = wx_rule_create(NULL, put->actions, put->n_actions, + put->idle_timeout, put->hard_timeout); + cls_rule_from_flow(put->flow, &rule->wr.cr); + rule->wr.ofp_table_id = ofp_table_id; + wx_rule_insert(wx, rule, NULL, 0); + + if (old_stats) { + /* XXX */ + memset(old_stats, 0, sizeof *old_stats); + } + if (rulep) { + *rulep = &rule->wr; + } + + return 0; +} + +static int +wx_flow_delete(struct wdp *wdp, struct wdp_rule *wdp_rule, + struct wdp_flow_stats *final_stats) +{ + struct wx *wx = wx_cast(wdp); + struct wx_rule *rule = wx_rule_cast(&wdp_rule->cr); + + wx_rule_remove(wx, rule); + if (final_stats) { + memset(final_stats, 0, sizeof *final_stats); /* XXX */ + } + return 0; +} + +static void +wx_flush_rule(struct cls_rule *cls_rule, void *wx_) +{ + struct wx_rule *rule = wx_rule_cast(cls_rule); + struct wx *wx = wx_; + + /* Mark the flow as not installed, even though it might really be + * installed, so that wx_rule_remove() doesn't bother trying to uninstall + * it. There is no point in uninstalling it individually since we are + * about to blow away all the flows with xfif_flow_flush(). */ + rule->installed = false; + + wx_rule_remove(wx, rule); +} + +static int +wx_flow_flush(struct wdp *wdp) +{ + struct wx *wx = wx_cast(wdp); + + COVERAGE_INC(wx_flow_flush); + classifier_for_each(&wx->cls, CLS_INC_ALL, wx_flush_rule, wx); + xfif_flow_flush(wx->xfif); + return 0; +} + +static int +wx_execute(struct wdp *wdp, uint16_t in_port, + const union ofp_action actions[], int n_actions, + const struct ofpbuf *packet) +{ + struct wx *wx = wx_cast(wdp); + struct xflow_actions xflow_actions; + flow_t flow; + int error; + + flow_extract((struct ofpbuf *) packet, 0, in_port, &flow); + error = wx_xlate_actions(wx, actions, n_actions, &flow, packet, + NULL, &xflow_actions, NULL); + if (error) { + return error; + } + xfif_execute(wx->xfif, ofp_port_to_xflow_port(in_port), + xflow_actions.actions, xflow_actions.n_actions, packet); + return 0; +} + +static int +wx_flow_inject(struct wdp *wdp, struct wdp_rule *wdp_rule, + uint16_t in_port, const struct ofpbuf *packet) +{ + struct wx_rule *rule = wx_rule_cast(&wdp_rule->cr); + int error; + + error = wx_execute(wdp, in_port, rule->wr.actions, rule->wr.n_actions, + packet); + if (!error) { + rule->packet_count++; + rule->byte_count += packet->size; + rule->used = time_msec(); + } + return error; +} + +static int +wx_recv_get_mask(const struct wdp *wdp, int *listen_mask) +{ + struct wx *wx = wx_cast(wdp); + int xflow_listen_mask; + int error; + + error = xfif_recv_get_mask(wx->xfif, &xflow_listen_mask); + if (!error) { + *listen_mask = 0; + if (xflow_listen_mask & XFLOWL_MISS) { + *listen_mask |= 1 << WDP_CHAN_MISS; + } + if (xflow_listen_mask & XFLOWL_ACTION) { + *listen_mask |= 1 << WDP_CHAN_ACTION; + } + if (xflow_listen_mask & XFLOWL_SFLOW) { + *listen_mask |= 1 << WDP_CHAN_SFLOW; + } + } + return error; +} + +static int +wx_recv_set_mask(struct wdp *wdp, int listen_mask) +{ + struct wx *wx = wx_cast(wdp); + int xflow_listen_mask; + + xflow_listen_mask = 0; + if (listen_mask & (1 << WDP_CHAN_MISS)) { + xflow_listen_mask |= XFLOWL_MISS; + } + if (listen_mask & (1 << WDP_CHAN_ACTION)) { + xflow_listen_mask |= XFLOWL_ACTION; + } + if (listen_mask & (1 << WDP_CHAN_SFLOW)) { + xflow_listen_mask |= XFLOWL_SFLOW; + } + + return xfif_recv_set_mask(wx->xfif, xflow_listen_mask); +} + +static int +wx_get_sflow_probability(const struct wdp *wdp, uint32_t *probability) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_get_sflow_probability(wx->xfif, probability); +} + +static int +wx_set_sflow_probability(struct wdp *wdp, uint32_t probability) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_set_sflow_probability(wx->xfif, probability); +} + +static int +wx_translate_xflow_msg(struct xflow_msg *msg, struct ofpbuf *payload, + struct wdp_packet *packet) +{ + packet->in_port = xflow_port_to_ofp_port(msg->port); + packet->send_len = 0; + packet->tun_id = 0; + + switch (msg->type) { + case _XFLOWL_MISS_NR: + packet->channel = WDP_CHAN_MISS; + packet->payload = payload; + packet->tun_id = msg->arg; + return 0; + + case _XFLOWL_ACTION_NR: + packet->channel = WDP_CHAN_ACTION; + packet->payload = payload; + packet->send_len = msg->arg; + return 0; + + case _XFLOWL_SFLOW_NR: + /* XXX */ + ofpbuf_delete(payload); + return ENOSYS; + + default: + VLOG_WARN_RL(&rl, "received XFLOW message of unexpected type %"PRIu32, + msg->type); + ofpbuf_delete(payload); + return ENOSYS; + } +} + +static const uint8_t * +get_local_mac(const struct wx *wx) +{ + const struct wdp_port *port = port_array_get(&wx->ports, XFLOWP_LOCAL); + return port ? port->opp.hw_addr : NULL; +} + +/* Returns true if 'packet' is a DHCP reply to the local port. Such a reply + * should be sent to the local port regardless of the flow table. + * + * We only need this, strictly speaking, when in-band control is turned on. */ +static bool +wx_is_local_dhcp_reply(const struct wx *wx, + const flow_t *flow, const struct ofpbuf *packet) +{ + if (flow->dl_type == htons(ETH_TYPE_IP) + && flow->nw_proto == IP_TYPE_UDP + && flow->tp_src == htons(DHCP_SERVER_PORT) + && flow->tp_dst == htons(DHCP_CLIENT_PORT) + && packet->l7) + { + const uint8_t *local_mac = get_local_mac(wx); + struct dhcp_header *dhcp = ofpbuf_at( + packet, (char *)packet->l7 - (char *)packet->data, sizeof *dhcp); + return dhcp && local_mac && eth_addr_equals(dhcp->chaddr, local_mac); + } + + return false; +} + +static bool +wx_explode_rule(struct wx *wx, struct xflow_msg *msg, struct ofpbuf *payload) +{ + struct wx_rule *rule; + flow_t flow; + + flow_extract(payload, 0, xflow_port_to_ofp_port(msg->port), &flow); + + if (wx_is_local_dhcp_reply(wx, &flow, payload)) { + union xflow_action action; + + memset(&action, 0, sizeof(action)); + action.output.type = XFLOWAT_OUTPUT; + action.output.port = XFLOWP_LOCAL; + xfif_execute(wx->xfif, msg->port, &action, 1, payload); + } + + rule = wx_rule_lookup_valid(wx, &flow); + if (!rule) { + return false; + } + + if (rule->wr.cr.flow.wildcards) { + rule = wx_rule_create_subrule(wx, rule, &flow); + wx_rule_make_actions(wx, rule, payload); + } else { + if (!rule->may_install) { + /* The rule is not installable, that is, we need to process every + * packet, so process the current packet and set its actions into + * 'subrule'. */ + wx_rule_make_actions(wx, rule, payload); + } else { + /* XXX revalidate rule if it needs it */ + } + } + + wx_rule_execute(wx, rule, payload, &flow); + wx_rule_reinstall(wx, rule); + + return true; +} + +static int +wx_recv(struct wdp *wdp, struct wdp_packet *packet) +{ + struct wx *wx = wx_cast(wdp); + int i; + + /* XXX need to avoid 50*50 potential cost for caller. */ + for (i = 0; i < 50; i++) { + struct xflow_msg *msg; + struct ofpbuf *buf; + int error; + + error = xfif_recv(wx->xfif, &buf); + if (error) { + return error; + } + + msg = ofpbuf_pull(buf, sizeof *msg); + if (msg->type != _XFLOWL_MISS_NR || !wx_explode_rule(wx, msg, buf)) { + return wx_translate_xflow_msg(msg, buf, packet); + } + ofpbuf_delete(buf); + } + return EAGAIN; +} + +static void +wx_recv_purge_queue__(struct wx *wx, int max, int xflow_listen_mask, + int *errorp) +{ + int error; + + error = xfif_recv_set_mask(wx->xfif, xflow_listen_mask); + if (!error) { + struct ofpbuf *buf; + + while (max > 0 && (error = xfif_recv(wx->xfif, &buf)) == 0) { + ofpbuf_delete(buf); + max--; + } + } + if (error && error != EAGAIN) { + *errorp = error; + } +} + +static int +wx_recv_purge(struct wdp *wdp) +{ + struct wx *wx = wx_cast(wdp); + struct xflow_stats xflow_stats; + int xflow_listen_mask; + int retval, error; + + xfif_get_xf_stats(wx->xfif, &xflow_stats); + + error = xfif_recv_get_mask(wx->xfif, &xflow_listen_mask); + if (error || !(xflow_listen_mask & XFLOWL_ALL)) { + return error; + } + + if (xflow_listen_mask & XFLOWL_MISS) { + wx_recv_purge_queue__(wx, xflow_stats.max_miss_queue, XFLOWL_MISS, + &error); + } + if (xflow_listen_mask & XFLOWL_ACTION) { + wx_recv_purge_queue__(wx, xflow_stats.max_action_queue, XFLOWL_ACTION, + &error); + } + if (xflow_listen_mask & XFLOWL_SFLOW) { + wx_recv_purge_queue__(wx, xflow_stats.max_sflow_queue, XFLOWL_SFLOW, + &error); + } + + retval = xfif_recv_set_mask(wx->xfif, xflow_listen_mask); + return retval ? retval : error; +} + + +static void +wx_recv_wait(struct wdp *wdp) +{ + struct wx *wx = wx_cast(wdp); + + xfif_recv_wait(wx->xfif); +} + +static int +wx_set_ofhooks(struct wdp *wdp, const struct ofhooks *ofhooks, void *aux) +{ + struct wx *wx = wx_cast(wdp); + + if (wx->ofhooks == &default_ofhooks) { + mac_learning_destroy(wx->ml); + wx->ml = NULL; + } + + wx->ofhooks = ofhooks; + wx->aux = aux; + return 0; +} + +static void +wx_revalidate(struct wdp *wdp, tag_type tag) +{ + struct wx *wx = wx_cast(wdp); + + tag_set_add(&wx->revalidate_set, tag); +} + +static void +wx_revalidate_all(struct wdp *wdp) +{ + struct wx *wx = wx_cast(wdp); + + wx->revalidate_all = true; +} + +static void wx_port_update(struct wx *, const char *devname, + wdp_port_poll_cb_func *cb, void *aux); +static void wx_port_reinit(struct wx *, wdp_port_poll_cb_func *cb, void *aux); + +static void +wx_port_process_change(struct wx *wx, int error, char *devname, + wdp_port_poll_cb_func *cb, void *aux) +{ + if (error == ENOBUFS) { + wx_port_reinit(wx, cb, aux); + } else if (!error) { + wx_port_update(wx, devname, cb, aux); + free(devname); + } +} + +static size_t +wx_port_refresh_group(struct wx *wx, unsigned int group) +{ + uint16_t *ports; + size_t n_ports; + struct wdp_port *port; + unsigned int port_no; + + assert(group == WX_GROUP_ALL || group == WX_GROUP_FLOOD); + + ports = xmalloc(port_array_count(&wx->ports) * sizeof *ports); + n_ports = 0; + PORT_ARRAY_FOR_EACH (port, &wx->ports, port_no) { + if (group == WX_GROUP_ALL || !(port->opp.config & OFPPC_NO_FLOOD)) { + ports[n_ports++] = port_no; + } + } + xfif_port_group_set(wx->xfif, group, ports, n_ports); + free(ports); + + return n_ports; +} + +static void +wx_port_refresh_groups(struct wx *wx) +{ + wx_port_refresh_group(wx, WX_GROUP_FLOOD); + wx_port_refresh_group(wx, WX_GROUP_ALL); +} + +static void +wx_port_reinit(struct wx *wx, wdp_port_poll_cb_func *cb, void *aux) +{ + struct svec devnames; + struct wdp_port *wdp_port; + unsigned int port_no; + struct xflow_port *xflow_ports; + size_t n_xflow_ports; + size_t i; + + svec_init(&devnames); + PORT_ARRAY_FOR_EACH (wdp_port, &wx->ports, port_no) { + svec_add (&devnames, (char *) wdp_port->opp.name); + } + xfif_port_list(wx->xfif, &xflow_ports, &n_xflow_ports); + for (i = 0; i < n_xflow_ports; i++) { + svec_add(&devnames, xflow_ports[i].devname); + } + free(xflow_ports); + + svec_sort_unique(&devnames); + for (i = 0; i < devnames.n; i++) { + wx_port_update(wx, devnames.names[i], cb, aux); + } + svec_destroy(&devnames); + + wx_port_refresh_groups(wx); +} + +static struct wdp_port * +make_wdp_port(const struct xflow_port *xflow_port) +{ + struct netdev_options netdev_options; + enum netdev_flags flags; + struct wdp_port *wdp_port; + struct netdev *netdev; + bool carrier; + int error; + + memset(&netdev_options, 0, sizeof netdev_options); + netdev_options.name = xflow_port->devname; + netdev_options.ethertype = NETDEV_ETH_TYPE_NONE; + + error = netdev_open(&netdev_options, &netdev); + if (error) { + VLOG_WARN_RL(&rl, "ignoring port %s (%"PRIu16") because netdev %s " + "cannot be opened (%s)", + xflow_port->devname, xflow_port->port, + xflow_port->devname, strerror(error)); + return NULL; + } + + wdp_port = xmalloc(sizeof *wdp_port); + wdp_port->netdev = netdev; + wdp_port->opp.port_no = xflow_port_to_ofp_port(xflow_port->port); + netdev_get_etheraddr(netdev, wdp_port->opp.hw_addr); + strncpy((char *) wdp_port->opp.name, xflow_port->devname, + sizeof wdp_port->opp.name); + wdp_port->opp.name[sizeof wdp_port->opp.name - 1] = '\0'; + + netdev_get_flags(netdev, &flags); + wdp_port->opp.config = flags & NETDEV_UP ? 0 : OFPPC_PORT_DOWN; + + netdev_get_carrier(netdev, &carrier); + wdp_port->opp.state = carrier ? 0 : OFPPS_LINK_DOWN; + + netdev_get_features(netdev, + &wdp_port->opp.curr, &wdp_port->opp.advertised, + &wdp_port->opp.supported, &wdp_port->opp.peer); + + wdp_port->devname = xstrdup(xflow_port->devname); + wdp_port->internal = (xflow_port->flags & XFLOW_PORT_INTERNAL) != 0; + return wdp_port; +} + +static bool +wx_port_conflicts(const struct wx *wx, const struct xflow_port *xflow_port) +{ + if (port_array_get(&wx->ports, xflow_port->port)) { + VLOG_WARN_RL(&rl, "ignoring duplicate port %"PRIu16" in datapath", + xflow_port->port); + return true; + } else if (shash_find(&wx->port_by_name, xflow_port->devname)) { + VLOG_WARN_RL(&rl, "ignoring duplicate device %s in datapath", + xflow_port->devname); + return true; + } else { + return false; + } +} + +static int +wdp_port_equal(const struct wdp_port *a_, const struct wdp_port *b_) +{ + const struct ofp_phy_port *a = &a_->opp; + const struct ofp_phy_port *b = &b_->opp; + + BUILD_ASSERT_DECL(sizeof *a == 48); /* Detect ofp_phy_port changes. */ + return (a->port_no == b->port_no + && !memcmp(a->hw_addr, b->hw_addr, sizeof a->hw_addr) + && !strcmp((char *) a->name, (char *) b->name) + && a->state == b->state + && a->config == b->config + && a->curr == b->curr + && a->advertised == b->advertised + && a->supported == b->supported + && a->peer == b->peer); +} + +static void +wx_port_install(struct wx *wx, struct wdp_port *wdp_port) +{ + uint16_t xflow_port = ofp_port_to_xflow_port(wdp_port->opp.port_no); + const char *netdev_name = (const char *) wdp_port->opp.name; + + netdev_monitor_add(wx->netdev_monitor, wdp_port->netdev); + port_array_set(&wx->ports, xflow_port, wdp_port); + shash_add(&wx->port_by_name, netdev_name, wdp_port); +} + +static void +wx_port_remove(struct wx *wx, struct wdp_port *wdp_port) +{ + uint16_t xflow_port = ofp_port_to_xflow_port(wdp_port->opp.port_no); + + netdev_monitor_remove(wx->netdev_monitor, wdp_port->netdev); + port_array_delete(&wx->ports, xflow_port); + shash_delete(&wx->port_by_name, + shash_find(&wx->port_by_name, (char *) wdp_port->opp.name)); +} + +static void +wx_port_free(struct wdp_port *wdp_port) +{ + if (wdp_port) { + netdev_close(wdp_port->netdev); + free(wdp_port); + } +} + +static void +wx_port_update(struct wx *wx, const char *devname, + wdp_port_poll_cb_func *cb, void *aux) +{ + struct xflow_port xflow_port; + struct wdp_port *old_wdp_port; + struct wdp_port *new_wdp_port; + int error; + + COVERAGE_INC(wx_update_port); + + /* Query the datapath for port information. */ + error = xfif_port_query_by_name(wx->xfif, devname, &xflow_port); + + /* Find the old wdp_port. */ + old_wdp_port = shash_find_data(&wx->port_by_name, devname); + if (!error) { + if (!old_wdp_port) { + /* There's no port named 'devname' but there might be a port with + * the same port number. This could happen if a port is deleted + * and then a new one added in its place very quickly, or if a port + * is renamed. In the former case we want to send an OFPPR_DELETE + * and an OFPPR_ADD, and in the latter case we want to send a + * single OFPPR_MODIFY. We can distinguish the cases by comparing + * the old port's ifindex against the new port, or perhaps less + * reliably but more portably by comparing the old port's MAC + * against the new port's MAC. However, this code isn't that smart + * and always sends an OFPPR_MODIFY (XXX). */ + old_wdp_port = port_array_get(&wx->ports, xflow_port.port); + } + } else if (error != ENOENT && error != ENODEV) { + VLOG_WARN_RL(&rl, "xfif_port_query_by_name returned unexpected error " + "%s", strerror(error)); + return; + } + + /* Create a new wdp_port. */ + new_wdp_port = !error ? make_wdp_port(&xflow_port) : NULL; + + /* Eliminate a few pathological cases. */ + if (!old_wdp_port && !new_wdp_port) { + return; + } else if (old_wdp_port && new_wdp_port) { + /* Most of the 'config' bits are OpenFlow soft state, but + * OFPPC_PORT_DOWN is maintained by the kernel. So transfer the + * OpenFlow bits from old_wdp_port. (make_wdp_port() only sets + * OFPPC_PORT_DOWN and leaves the other bits 0.) */ + new_wdp_port->opp.config |= old_wdp_port->opp.config & ~OFPPC_PORT_DOWN; + + if (wdp_port_equal(old_wdp_port, new_wdp_port)) { + /* False alarm--no change. */ + wx_port_free(new_wdp_port); + return; + } + } + + /* Now deal with the normal cases. */ + if (old_wdp_port) { + wx_port_remove(wx, old_wdp_port); + } + if (new_wdp_port) { + wx_port_install(wx, new_wdp_port); + } + + /* Call back. */ + if (!old_wdp_port) { + (*cb)(&new_wdp_port->opp, OFPPR_ADD, aux); + } else if (!new_wdp_port) { + (*cb)(&old_wdp_port->opp, OFPPR_DELETE, aux); + } else { + (*cb)(&new_wdp_port->opp, OFPPR_MODIFY, aux); + } + + /* Update port groups. */ + wx_port_refresh_groups(wx); + + /* Clean up. */ + wx_port_free(old_wdp_port); +} + +static int +wx_port_init(struct wx *wx) +{ + struct xflow_port *ports; + size_t n_ports; + size_t i; + int error; + + error = xfif_port_list(wx->xfif, &ports, &n_ports); + if (error) { + return error; + } + + for (i = 0; i < n_ports; i++) { + const struct xflow_port *xflow_port = &ports[i]; + if (!wx_port_conflicts(wx, xflow_port)) { + struct wdp_port *wdp_port = make_wdp_port(xflow_port); + if (wdp_port) { + wx_port_install(wx, wdp_port); + } + } + } + free(ports); + wx_port_refresh_groups(wx); + return 0; +} + +void +wdp_xflow_register(void) +{ + static const struct wdp_class wdp_xflow_class = { + NULL, /* name */ + wx_run, + wx_wait, + wx_enumerate, + wx_open, + wx_close, + wx_get_all_names, + wx_destroy, + wx_get_features, + wx_get_stats, + wx_get_table_stats, + wx_get_drop_frags, + wx_set_drop_frags, + wx_port_add, + wx_port_del, + wx_port_query_by_number, + wx_port_query_by_name, + wx_port_list, + wx_port_set_config, + wx_port_poll, + wx_port_poll_wait, + wx_flow_get, + wx_flow_match, + wx_flow_for_each_match, + wx_flow_get_stats, + wx_flow_overlaps, + wx_flow_put, + wx_flow_delete, + wx_flow_flush, + wx_flow_inject, + wx_execute, + wx_recv_get_mask, + wx_recv_set_mask, + wx_get_sflow_probability, + wx_set_sflow_probability, + wx_recv, + wx_recv_purge, + wx_recv_wait, + wx_set_ofhooks, + wx_revalidate, + wx_revalidate_all, + }; + + static bool inited = false; + + struct svec types; + const char *type; + bool registered; + int i; + + if (inited) { + return; + } + inited = true; + + svec_init(&types); + xf_enumerate_types(&types); + + registered = false; + SVEC_FOR_EACH (i, type, &types) { + struct wdp_class *class; + + class = xmalloc(sizeof *class); + *class = wdp_xflow_class; + class->type = xstrdup(type); + if (registered) { + class->run = NULL; + class->wait = NULL; + } + if (!wdp_register_provider(class)) { + registered = true; + } + } + + svec_destroy(&types); +} + +static bool +default_normal_ofhook_cb(const flow_t *flow, const struct ofpbuf *packet, + struct xflow_actions *actions, tag_type *tags, + uint16_t *nf_output_iface, void *wx_) +{ + struct wx *wx = wx_; + int out_port; + + /* Drop frames for reserved multicast addresses. */ + if (eth_addr_is_reserved(flow->dl_dst)) { + return true; + } + + /* Learn source MAC (but don't try to learn from revalidation). */ + if (packet != NULL) { + tag_type rev_tag = mac_learning_learn(wx->ml, flow->dl_src, + 0, flow->in_port, + GRAT_ARP_LOCK_NONE); + if (rev_tag) { + /* The log messages here could actually be useful in debugging, + * so keep the rate limit relatively high. */ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300); + VLOG_DBG_RL(&rl, "learned that "ETH_ADDR_FMT" is on port %"PRIu16, + ETH_ADDR_ARGS(flow->dl_src), flow->in_port); + tag_set_add(&wx->revalidate_set, rev_tag); + } + } + + /* Determine output port. */ + out_port = mac_learning_lookup_tag(wx->ml, flow->dl_dst, 0, tags, + NULL); + if (out_port < 0) { + add_output_group_action(actions, WX_GROUP_FLOOD, nf_output_iface); + } else if (out_port != flow->in_port) { + xflow_actions_add(actions, XFLOWAT_OUTPUT)->output.port = out_port; + *nf_output_iface = out_port; + } else { + /* Drop. */ + } + + return true; +} + +static const struct ofhooks default_ofhooks = { + NULL, + default_normal_ofhook_cb, + NULL, + NULL +}; diff --cc ofproto/wdp.c index f48f42dbf,000000000..a5f3d3e17 mode 100644,000000..100644 --- a/ofproto/wdp.c +++ b/ofproto/wdp.c @@@ -1,1123 -1,0 +1,1123 @@@ +/* + * Copyright (c) 2008, 2009, 2010 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "wdp-provider.h" + +#include +#include +#include +#include +#include +#include + +#include "coverage.h" +#include "dynamic-string.h" +#include "flow.h" +#include "netdev.h" +#include "netlink.h" +#include "ofp-print.h" +#include "ofpbuf.h" +#include "packets.h" +#include "poll-loop.h" +#include "shash.h" +#include "svec.h" +#include "timeval.h" +#include "util.h" +#include "valgrind.h" ++#include "vlog.h" +#include "wdp-xflow.h" + - #include "vlog.h" - #define THIS_MODULE VLM_wdp ++VLOG_DEFINE_THIS_MODULE(wdp) + +/* wdp_rule */ + +/* Initializes a new 'struct wdp_rule', copying in the 'n_actions' elements of + * 'actions'. + * + * The caller is responsible for initializing 'rule->cr'. The caller must also + * fill in 'rule->ofp_table_id', if the wdp has more than one table. */ +void +wdp_rule_init(struct wdp_rule *rule, const union ofp_action *actions, + size_t n_actions) +{ + rule->actions = xmemdup(actions, n_actions * sizeof *actions); + rule->n_actions = n_actions; + rule->created = time_msec(); + rule->idle_timeout = 0; + rule->hard_timeout = 0; + rule->ofp_table_id = 0; + rule->client_data = NULL; +} + +/* Frees the data in 'rule'. */ +void +wdp_rule_uninit(struct wdp_rule *rule) +{ + free(rule->actions); +} + +/* wdp */ + +static const struct wdp_class *base_wdp_classes[] = { + /* XXX none yet */ +}; + +struct registered_wdp_class { + const struct wdp_class *wdp_class; + int refcount; +}; + +static struct shash wdp_classes = SHASH_INITIALIZER(&wdp_classes); + +/* Rate limit for individual messages going to or from the datapath, output at + * DBG level. This is very high because, if these are enabled, it is because + * we really need to see them. */ +static struct vlog_rate_limit wdpmsg_rl = VLOG_RATE_LIMIT_INIT(600, 600); + +/* Not really much point in logging many wdp errors. */ +static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5); + +static void log_operation(const struct wdp *, const char *operation, + int error); + +static void +wdp_initialize(void) +{ + static int status = -1; + + if (status < 0) { + int i; + + status = 0; + for (i = 0; i < ARRAY_SIZE(base_wdp_classes); i++) { + wdp_register_provider(base_wdp_classes[i]); + } + wdp_xflow_register(); + } +} + +/* Performs periodic work needed by all the various kinds of wdps. + * + * If your program opens any wdps, it must call both this function and + * netdev_run() within its main poll loop. */ +void +wdp_run(void) +{ + struct shash_node *node; + SHASH_FOR_EACH (node, &wdp_classes) { + const struct registered_wdp_class *registered_class = node->data; + if (registered_class->wdp_class->run) { + registered_class->wdp_class->run(); + } + } +} + +/* Arranges for poll_block() to wake up when wdp_run() needs to be called. + * + * If your program opens any wdps, it must call both this function and + * netdev_wait() within its main poll loop. */ +void +wdp_wait(void) +{ + struct shash_node *node; + SHASH_FOR_EACH(node, &wdp_classes) { + const struct registered_wdp_class *registered_class = node->data; + if (registered_class->wdp_class->wait) { + registered_class->wdp_class->wait(); + } + } +} + +/* Registers a new datapath provider. After successful registration, new + * datapaths of that type can be opened using wdp_open(). */ +int +wdp_register_provider(const struct wdp_class *new_class) +{ + struct registered_wdp_class *registered_class; + + if (shash_find(&wdp_classes, new_class->type)) { + VLOG_WARN("attempted to register duplicate datapath provider: %s", + new_class->type); + return EEXIST; + } + + registered_class = xmalloc(sizeof *registered_class); + registered_class->wdp_class = new_class; + registered_class->refcount = 0; + + shash_add(&wdp_classes, new_class->type, registered_class); + + return 0; +} + +/* Unregisters a datapath provider. 'type' must have been previously + * registered and not currently be in use by any wdps. After unregistration + * new datapaths of that type cannot be opened using wdp_open(). */ +int +wdp_unregister_provider(const char *type) +{ + struct shash_node *node; + struct registered_wdp_class *registered_class; + + node = shash_find(&wdp_classes, type); + if (!node) { + VLOG_WARN("attempted to unregister a datapath provider that is not " + "registered: %s", type); + return EAFNOSUPPORT; + } + + registered_class = node->data; + if (registered_class->refcount) { + VLOG_WARN("attempted to unregister in use datapath provider: %s", + type); + return EBUSY; + } + + shash_delete(&wdp_classes, node); + free(registered_class); + + return 0; +} + +/* Clears 'types' and enumerates the types of all currently registered wdp + * providers into it. The caller must first initialize the svec. */ +void +wdp_enumerate_types(struct svec *types) +{ + struct shash_node *node; + + wdp_initialize(); + svec_clear(types); + + SHASH_FOR_EACH (node, &wdp_classes) { + const struct registered_wdp_class *registered_class = node->data; + svec_add(types, registered_class->wdp_class->type); + } +} + +/* Clears 'names' and enumerates the names of all known created datapaths + * with the given 'type'. The caller must first initialize the svec. Returns 0 + * if successful, otherwise a positive errno value. + * + * Some kinds of datapaths might not be practically enumerable. This is not + * considered an error. */ +int +wdp_enumerate_names(const char *type, struct svec *names) +{ + const struct registered_wdp_class *registered_class; + const struct wdp_class *wdp_class; + int error; + + wdp_initialize(); + svec_clear(names); + + registered_class = shash_find_data(&wdp_classes, type); + if (!registered_class) { + VLOG_WARN("could not enumerate unknown type: %s", type); + return EAFNOSUPPORT; + } + + wdp_class = registered_class->wdp_class; + error = (wdp_class->enumerate + ? wdp_class->enumerate(wdp_class, names) + : 0); + + if (error) { + VLOG_WARN("failed to enumerate %s datapaths: %s", wdp_class->type, + strerror(error)); + } + + return error; +} + +/* Parses 'datapath_name', which is of the form type@name, into its + * component pieces. 'name' and 'type' must be freed by the caller. */ +void +wdp_parse_name(const char *datapath_name_, char **name, char **type) +{ + char *datapath_name = xstrdup(datapath_name_); + char *separator; + + separator = strchr(datapath_name, '@'); + if (separator) { + *separator = '\0'; + *type = datapath_name; + *name = xstrdup(separator + 1); + } else { + *name = datapath_name; + *type = NULL; + } +} + +static int +do_open(const char *name, const char *type, bool create, struct wdp **wdpp) +{ + struct wdp *wdp = NULL; + int error; + struct registered_wdp_class *registered_class; + + wdp_initialize(); + + if (!type || *type == '\0') { + type = "system"; + } + + registered_class = shash_find_data(&wdp_classes, type); + if (!registered_class) { + VLOG_WARN("could not create datapath %s of unknown type %s", name, + type); + error = EAFNOSUPPORT; + goto exit; + } + + error = registered_class->wdp_class->open(registered_class->wdp_class, + name, create, &wdp); + if (!error) { + registered_class->refcount++; + } + +exit: + *wdpp = error ? NULL : wdp; + return error; +} + +/* Tries to open an existing datapath named 'name' and type 'type'. Will fail + * if no datapath with 'name' and 'type' exists. 'type' may be either NULL or + * the empty string to specify the default system type. Returns 0 if + * successful, otherwise a positive errno value. On success stores a pointer + * to the datapath in '*wdpp', otherwise a null pointer. */ +int +wdp_open(const char *name, const char *type, struct wdp **wdpp) +{ + return do_open(name, type, false, wdpp); +} + +/* Tries to create and open a new datapath with the given 'name' and 'type'. + * 'type' may be either NULL or the empty string to specify the default system + * type. Will fail if a datapath with 'name' and 'type' already exists. + * Returns 0 if successful, otherwise a positive errno value. On success + * stores a pointer to the datapath in '*wdpp', otherwise a null pointer. */ +int +wdp_create(const char *name, const char *type, struct wdp **wdpp) +{ + return do_open(name, type, true, wdpp); +} + +/* Tries to open a datapath with the given 'name' and 'type', creating it if it + * does not exist. 'type' may be either NULL or the empty string to specify + * the default system type. Returns 0 if successful, otherwise a positive + * errno value. On success stores a pointer to the datapath in '*wdpp', + * otherwise a null pointer. */ +int +wdp_create_and_open(const char *name, const char *type, struct wdp **wdpp) +{ + int error; + + error = wdp_create(name, type, wdpp); + if (error == EEXIST || error == EBUSY) { + error = wdp_open(name, type, wdpp); + if (error) { + VLOG_WARN("datapath %s already exists but cannot be opened: %s", + name, strerror(error)); + } + } else if (error) { + VLOG_WARN("failed to create datapath %s: %s", name, strerror(error)); + } + return error; +} + +/* Closes and frees the connection to 'wdp'. Does not destroy the wdp + * itself; call wdp_delete() first, instead, if that is desirable. */ +void +wdp_close(struct wdp *wdp) +{ + if (wdp) { + struct registered_wdp_class *registered_class; + + registered_class = shash_find_data(&wdp_classes, + wdp->wdp_class->type); + assert(registered_class); + assert(registered_class->refcount); + + registered_class->refcount--; + wdp_uninit(wdp, true); + } +} + +/* Returns the name of datapath 'wdp' prefixed with the type + * (for use in log messages). */ +const char * +wdp_name(const struct wdp *wdp) +{ + return wdp->full_name; +} + +/* Returns the name of datapath 'wdp' without the type + * (for use in device names). */ +const char * +wdp_base_name(const struct wdp *wdp) +{ + return wdp->base_name; +} + +/* Enumerates all names that may be used to open 'wdp' into 'all_names'. The + * Linux datapath, for example, supports opening a datapath both by number, + * e.g. "wdp0", and by the name of the datapath's local port. For some + * datapaths, this might be an infinite set (e.g. in a file name, slashes may + * be duplicated any number of times), in which case only the names most likely + * to be used will be enumerated. + * + * The caller must already have initialized 'all_names'. Any existing names in + * 'all_names' will not be disturbed. */ +int +wdp_get_all_names(const struct wdp *wdp, struct svec *all_names) +{ + if (wdp->wdp_class->get_all_names) { + int error = wdp->wdp_class->get_all_names(wdp, all_names); + if (error) { + VLOG_WARN_RL(&error_rl, + "failed to retrieve names for datpath %s: %s", + wdp_name(wdp), strerror(error)); + } + return error; + } else { + svec_add(all_names, wdp_base_name(wdp)); + return 0; + } +} + +/* Destroys the datapath that 'wdp' is connected to, first removing all of + * its ports. After calling this function, it does not make sense to pass + * 'wdp' to any functions other than wdp_name() or wdp_close(). */ +int +wdp_delete(struct wdp *wdp) +{ + int error; + + COVERAGE_INC(wdp_destroy); + + error = wdp->wdp_class->destroy(wdp); + log_operation(wdp, "delete", error); + return error; +} + +/* Obtains the set of features supported by 'wdp'. + * + * If successful, returns 0 and stores in '*featuresp' a newly allocated + * "struct ofp_switch_features" that describes the features and ports supported + * by 'wdp'. The caller is responsible for initializing the header, + * datapath_id, and n_buffers members of the returned "struct + * ofp_switch_features". The caller must free the returned buffer (with + * ofpbuf_delete()) when it is no longer needed. + * + * On error, returns an OpenFlow error code (as constructed by ofp_mkerr()) and + * sets '*featuresp' to NULL. */ +int +wdp_get_features(const struct wdp *wdp, struct ofpbuf **featuresp) +{ + int error = wdp->wdp_class->get_features(wdp, featuresp); + if (error) { + *featuresp = NULL; + } + return error; +} + +/* Retrieves statistics for 'wdp' into 'stats'. Returns 0 if successful, + * otherwise a positive errno value. On error, clears 'stats' to + * all-bits-zero. */ +int +wdp_get_wdp_stats(const struct wdp *wdp, struct wdp_stats *stats) +{ + int error = wdp->wdp_class->get_stats(wdp, stats); + if (error) { + memset(stats, 0, sizeof *stats); + } + log_operation(wdp, "get_stats", error); + return error; +} + +/* Appends to 'stats' one or more 'struct ofp_table_stats' structures that + * represent the tables maintained by 'wdp'. Returns 0 if successful, + * otherwise an OpenFlow error code constructed with ofp_mkerr(). */ +int +wdp_get_table_stats(const struct wdp *wdp, struct ofpbuf *stats) +{ + int error = wdp->wdp_class->get_table_stats(wdp, stats); + if (!error) { + assert(stats->size > sizeof(struct ofp_stats_reply)); + assert(((stats->size - sizeof(struct ofp_stats_reply)) + % sizeof(struct ofp_table_stats)) == 0); + } + log_operation(wdp, "get_table_stats", error); + return error; +} + +/* Retrieves the current IP fragment handling policy for 'wdp' into + * '*drop_frags': true indicates that fragments are dropped, false indicates + * that fragments are treated in the same way as other IP packets (except that + * the L4 header cannot be read). Returns 0 if successful, otherwise a + * positive errno value. */ +int +wdp_get_drop_frags(const struct wdp *wdp, bool *drop_frags) +{ + int error = wdp->wdp_class->get_drop_frags(wdp, drop_frags); + if (error) { + *drop_frags = false; + } + log_operation(wdp, "get_drop_frags", error); + return error; +} + +/* Changes 'wdp''s treatment of IP fragments to 'drop_frags', whose meaning is + * the same as for the get_drop_frags member function. Returns 0 if + * successful, otherwise a positive errno value. EOPNOTSUPP indicates that + * 'wdp''s fragment dropping policy is not configurable. */ +int +wdp_set_drop_frags(struct wdp *wdp, bool drop_frags) +{ + int error; + error = (wdp->wdp_class->set_drop_frags + ? wdp->wdp_class->set_drop_frags(wdp, drop_frags) + : EOPNOTSUPP); + log_operation(wdp, "set_drop_frags", error); + return error; +} + +/* Clears the contents of 'port'. */ +void +wdp_port_clear(struct wdp_port *port) +{ + memset(port, 0, sizeof *port); +} + +/* Makes a deep copy of 'old' in 'port'. The caller may free 'port''s data + * with wdp_port_free(). */ +void +wdp_port_copy(struct wdp_port *port, const struct wdp_port *old) +{ + port->netdev = old->netdev ? netdev_reopen(old->netdev) : NULL; + port->opp = old->opp; + port->devname = old->devname ? xstrdup(old->devname) : NULL; + port->internal = old->internal; +} + +/* Frees the data that 'port' points to (but not 'port' itself). */ +void +wdp_port_free(struct wdp_port *port) +{ + if (port) { + netdev_close(port->netdev); + free(port->devname); + } +} + +/* Frees the data that each of the 'n' ports in 'ports' points to, and then + * frees 'ports' itself. */ +void +wdp_port_array_free(struct wdp_port *ports, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) { + wdp_port_free(&ports[i]); + } + free(ports); +} + +/* Attempts to add 'devname' as a port on 'wdp': + * + * - If 'internal' is true, attempts to create a new internal port (a virtual + * port implemented in software) by that name. + * + * - If 'internal' is false, 'devname' must name an existing network device. + * + * If successful, returns 0 and sets '*port_nop' to the new port's OpenFlow + * port number (if 'port_nop' is non-null). On failure, returns a positive + * errno value and sets '*port_nop' to OFPP_NONE (if 'port_nop' is non-null). + * + * Some wildcarded datapaths might have fixed sets of ports. For these + * datapaths this function will always fail. + * + * Possible error return values include: + * + * - ENODEV: No device named 'devname' exists (if 'internal' is false). + * + * - EEXIST: A device named 'devname' already exists (if 'internal' is true). + * + * - EINVAL: Device 'devname' is not supported as part of a datapath (e.g. it + * is not an Ethernet device), or 'devname' is too long for a network + * device name (if 'internal' is true) + * + * - EFBIG: The datapath already has as many ports as it can support. + * + * - EOPNOTSUPP: 'wdp' has a fixed set of ports. + */ +int +wdp_port_add(struct wdp *wdp, const char *devname, + bool internal, uint16_t *port_nop) +{ + uint16_t port_no; + int error; + + COVERAGE_INC(wdp_port_add); + + error = (wdp->wdp_class->port_add + ? wdp->wdp_class->port_add(wdp, devname, internal, &port_no) + : EOPNOTSUPP); + if (!error) { + VLOG_DBG_RL(&wdpmsg_rl, "%s: added %s as port %"PRIu16, + wdp_name(wdp), devname, port_no); + } else { + VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s", + wdp_name(wdp), devname, strerror(error)); + port_no = OFPP_NONE; + } + if (port_nop) { + *port_nop = port_no; + } + return error; +} + +/* Attempts to remove 'wdp''s port numbered 'port_no'. Returns 0 if + * successful, otherwise a positive errno value. + * + * Some wildcarded datapaths might have fixed sets of ports. For these + * datapaths this function will always fail. + * + * Possible error return values include: + * + * - EINVAL: 'port_no' is outside the valid range, or this particular port is + * not removable (e.g. it is the local port). + * + * - ENOENT: 'wdp' currently has no port numbered 'port_no'. + * + * - EOPNOTSUPP: 'wdp' has a fixed set of ports. + */ +int +wdp_port_del(struct wdp *wdp, uint16_t port_no) +{ + int error; + + COVERAGE_INC(wdp_port_del); + + error = (wdp->wdp_class->port_del + ? wdp->wdp_class->port_del(wdp, port_no) + : EOPNOTSUPP); + log_operation(wdp, "port_del", error); + return error; +} + +/* Looks up port number 'port_no' in 'wdp'. On success, returns 0 and + * initializes 'port' with port details. On failure, returns a positive errno + * value and clears the contents of 'port' (with wdp_port_clear()). + * + * The caller must not modify or free the returned wdp_port. Calling + * wdp_run() or wdp_port_poll() may free the returned wdp_port. + * + * Possible error return values include: + * + * - EINVAL: 'port_no' is outside the valid range. + * + * - ENOENT: 'wdp' currently has no port numbered 'port_no'. + */ +int +wdp_port_query_by_number(const struct wdp *wdp, uint16_t port_no, + struct wdp_port *port) +{ + int error; + + error = wdp->wdp_class->port_query_by_number(wdp, port_no, port); + if (!error) { + VLOG_DBG_RL(&wdpmsg_rl, "%s: port %"PRIu16" is device %s", + wdp_name(wdp), port_no, port->devname); + } else { + wdp_port_clear(port); + VLOG_WARN_RL(&error_rl, "%s: failed to query port %"PRIu16": %s", + wdp_name(wdp), port_no, strerror(error)); + } + return error; +} + +/* Same as wdp_port_query_by_number() except that it look for a port named + * 'devname' in 'wdp'. + * + * Possible error return values include: + * + * - ENODEV: No device named 'devname' exists. + * + * - ENOENT: 'devname' exists but it is not attached as a port on 'wdp'. + */ +int +wdp_port_query_by_name(const struct wdp *wdp, const char *devname, + struct wdp_port *port) +{ + int error = wdp->wdp_class->port_query_by_name(wdp, devname, port); + if (!error) { + VLOG_DBG_RL(&wdpmsg_rl, "%s: device %s is on port %"PRIu16, + wdp_name(wdp), devname, port->opp.port_no); + } else { + wdp_port_clear(port); + + /* Log level is DBG here because all the current callers are interested + * in whether 'wdp' actually has a port 'devname', so that it's not + * an issue worth logging if it doesn't. */ + VLOG_DBG_RL(&error_rl, "%s: failed to query port %s: %s", + wdp_name(wdp), devname, strerror(error)); + } + return error; +} + +/* Looks up port number 'port_no' in 'wdp'. On success, returns 0 and stores + * a copy of the port's name in '*namep'. On failure, returns a positive errno + * value and stores NULL in '*namep'. + * + * Error return values are the same as for wdp_port_query_by_name(). + * + * The caller is responsible for freeing '*namep' (with free()). */ +int +wdp_port_get_name(struct wdp *wdp, uint16_t port_no, char **namep) +{ + struct wdp_port port; + int error; + + error = wdp_port_query_by_number(wdp, port_no, &port); + *namep = port.devname; + port.devname = NULL; + wdp_port_free(&port); + + return error; +} + +/* Obtains a list of all the ports in 'wdp', in no particular order. + * + * If successful, returns 0 and sets '*portsp' to point to an array of struct + * wdp_port and '*n_portsp' to the number of pointers in the array. On + * failure, returns a positive errno value and sets '*portsp' to NULL and + * '*n_portsp' to 0. + * + * The caller is responsible for freeing '*portsp' and the individual wdp_port + * structures, e.g. with wdp_port_array_free(). */ +int +wdp_port_list(const struct wdp *wdp, + struct wdp_port **portsp, size_t *n_portsp) +{ + int error; + + error = wdp->wdp_class->port_list(wdp, portsp, n_portsp); + if (error) { + *portsp = NULL; + *n_portsp = 0; + } + log_operation(wdp, "port_list", error); + return error; +} + +/* Updates the configuration for the port number 'port_no' within 'wdp' to + * 'config', which is a set of OpenFlow OFPPC_* constants in host byte order. + * Returns 0 if successful, otherwise an OpenFlow error code constructed with + * ofp_mkerr(). */ +int +wdp_port_set_config(struct wdp *wdp, uint16_t port_no, uint32_t config) +{ + return wdp->wdp_class->port_set_config(wdp, port_no, config); +} + +/* Polls for changes in the set of ports in 'wdp' since the last call to this + * function or, if this is the first call, since 'wdp' was opened. For each + * change, calls 'cb' passing 'aux' and: + * + * - For a port that has been added, OFPPR_ADD as 'reason' and the new port's + * "struct ofp_phy_port" as 'opp'. + * + * - For a port that has been removed, OFPPR_DELETE as 'reason' and the + * deleted port's former "struct ofp_phy_port" as 'opp'. + * + * - For a port whose configuration has changed, OFPPR_MODIFY as 'reason' and + * the modified port's new "struct ofp_phy_port" as 'opp'. + * + * 'opp' is in *host* byte order. + * + * Normally returns 0. May also return a positive errno value to indicate + * that something has gone wrong. + */ +int +wdp_port_poll(struct wdp *wdp, wdp_port_poll_cb_func *cb, void *aux) +{ + return wdp->wdp_class->port_poll(wdp, cb, aux); +} + +/* Arranges for the poll loop to wake up when 'port_poll' will call its + * callback. */ +int +wdp_port_poll_wait(const struct wdp *wdp) +{ + return wdp->wdp_class->port_poll_wait(wdp); +} + +/* Deletes all flows from 'wdp'. Returns 0 if successful, otherwise a + * positive errno value. */ +int +wdp_flow_flush(struct wdp *wdp) +{ + int error; + + COVERAGE_INC(wdp_flow_flush); + + error = wdp->wdp_class->flow_flush(wdp); + log_operation(wdp, "flow_flush", error); + return error; +} + +/* If 'wdp' contains exactly one flow exactly equal to 'flow' in one of the + * tables in the bit-mask in 'include', returns that flow. Otherwise (if there + * is no match or more than one match), returns null. + * + * A flow in table 'table_id' is a candidate for matching if 'include & (1u << + * table_id)' is nonzero. */ +struct wdp_rule * +wdp_flow_get(struct wdp *wdp, const flow_t *flow, unsigned int include) +{ + return wdp->wdp_class->flow_get(wdp, flow, include); +} + +struct wdp_rule * +wdp_flow_match(struct wdp *wdp, const flow_t *flow) +{ + return wdp->wdp_class->flow_match(wdp, flow); +} + +/* Iterates through all of the flows in 'wdp''s flow table, passing each flow + * that matches the specified search criteria to 'callback' along with 'aux'. + * + * Flows are filtered out in two ways. First, based on the bit-mask in + * 'include': wdp_rule 'wr' is included only if 'include & (1u << + * wr->ofp_table_id)' is nonzero. + * + * Flows are also filtered out based on 'target': on a field-by-field basis, a + * flow is included if 'target' wildcards that field or if the flow and + * 'target' both have the same exact value for the field. A flow is excluded + * if any field does not match based on these criteria. + * + * Ignores 'target->priority'. + * + * 'callback' is allowed to delete the rule that is passed as its argument. It + * may modify any flow in 'wdp', e.g. changing their actions. 'callback' must + * not delete flows from 'wdp' other than its argument flow, nor may it insert + * new flows into 'wdp'. */ +void +wdp_flow_for_each_match(const struct wdp *wdp, const flow_t *target, + unsigned int include, + wdp_flow_cb_func *callback, void *aux) +{ + wdp->wdp_class->flow_for_each_match(wdp, target, include, callback, aux); +} + +int +wdp_flow_get_stats(const struct wdp *wdp, const struct wdp_rule *rule, + struct wdp_flow_stats *stats) +{ + int error = wdp->wdp_class->flow_get_stats(wdp, rule, stats); + if (error) { + memset(stats, 0, sizeof *stats); + } + return error; +} + +bool +wdp_flow_overlaps(const struct wdp *wdp, const flow_t *flow) +{ + return wdp->wdp_class->flow_overlaps(wdp, flow); +} + +int +wdp_flow_put(struct wdp *wdp, struct wdp_flow_put *put, + struct wdp_flow_stats *old_stats, struct wdp_rule **rulep) +{ + int error = wdp->wdp_class->flow_put(wdp, put, old_stats, rulep); + if (error) { + if (old_stats) { + memset(old_stats, 0, sizeof *old_stats); + } + if (rulep) { + *rulep = NULL; + } + } + return error; +} + +int +wdp_flow_delete(struct wdp *wdp, struct wdp_rule *rule, + struct wdp_flow_stats *final_stats) +{ + int error = wdp->wdp_class->flow_delete(wdp, rule, final_stats); + if (error && final_stats) { + memset(final_stats, 0, sizeof *final_stats); + } + return error; +} + +int +wdp_flow_inject(struct wdp *wdp, struct wdp_rule *rule, + uint16_t in_port, const struct ofpbuf *packet) +{ + return wdp->wdp_class->flow_inject(wdp, rule, in_port, packet); +} + +int +wdp_execute(struct wdp *wdp, uint16_t in_port, + const union ofp_action actions[], size_t n_actions, + const struct ofpbuf *buf) +{ + int error; + + COVERAGE_INC(wdp_execute); + if (n_actions > 0) { + error = wdp->wdp_class->execute(wdp, in_port, actions, + n_actions, buf); + } else { + error = 0; + } + return error; +} + +/* Retrieves 'wdp''s "listen mask" into '*listen_mask'. Each bit set in + * '*listen_mask' indicates that wdp_recv() will receive messages of the + * corresponding WDP_CHAN_* type. Returns 0 if successful, otherwise a + * positive errno value. */ +int +wdp_recv_get_mask(const struct wdp *wdp, int *listen_mask) +{ + int error = wdp->wdp_class->recv_get_mask(wdp, listen_mask); + if (error) { + *listen_mask = 0; + } + log_operation(wdp, "recv_get_mask", error); + return error; +} + +/* Sets 'wdp''s "listen mask" to 'listen_mask'. Each bit set in + * '*listen_mask' requests that wdp_recv() receive messages of the + * corresponding WDP_CHAN_* type. Returns 0 if successful, otherwise a + * positive errno value. */ +int +wdp_recv_set_mask(struct wdp *wdp, int listen_mask) +{ + int error = wdp->wdp_class->recv_set_mask(wdp, listen_mask); + log_operation(wdp, "recv_set_mask", error); + return error; +} + +/* Retrieve the sFlow sampling probability. '*probability' is expressed as the + * number of packets out of UINT_MAX to sample, e.g. probability/UINT_MAX is + * the probability of sampling a given packet. + * + * Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP + * indicates that 'wdp' does not support sFlow sampling. */ +int +wdp_get_sflow_probability(const struct wdp *wdp, uint32_t *probability) +{ + int error = (wdp->wdp_class->get_sflow_probability + ? wdp->wdp_class->get_sflow_probability(wdp, probability) + : EOPNOTSUPP); + if (error) { + *probability = 0; + } + log_operation(wdp, "get_sflow_probability", error); + return error; +} + +/* Set the sFlow sampling probability. 'probability' is expressed as the + * number of packets out of UINT_MAX to sample, e.g. probability/UINT_MAX is + * the probability of sampling a given packet. + * + * Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP + * indicates that 'wdp' does not support sFlow sampling. */ +int +wdp_set_sflow_probability(struct wdp *wdp, uint32_t probability) +{ + int error = (wdp->wdp_class->set_sflow_probability + ? wdp->wdp_class->set_sflow_probability(wdp, probability) + : EOPNOTSUPP); + log_operation(wdp, "set_sflow_probability", error); + return error; +} + +/* Attempts to receive a message from 'wdp'. If successful, stores the + * message into '*packetp'. Only messages of the types selected with + * wdp_set_listen_mask() will ordinarily be received (but if a message type + * is enabled and then later disabled, some stragglers might pop up). + * + * Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN + * if no message is immediately available. */ +int +wdp_recv(struct wdp *wdp, struct wdp_packet *packet) +{ + int error = wdp->wdp_class->recv(wdp, packet); + if (!error) { + /* XXX vlog_dbg received packet */ + } else { + memset(packet, 0, sizeof *packet); + packet->channel = -1; + } + return error; +} + +/* Discards all messages that would otherwise be received by wdp_recv() on + * 'wdp'. Returns 0 if successful, otherwise a positive errno value. */ +int +wdp_recv_purge(struct wdp *wdp) +{ + COVERAGE_INC(wdp_purge); + return wdp->wdp_class->recv_purge(wdp); +} + +/* Arranges for the poll loop to wake up when 'wdp' has a message queued to be + * received with wdp_recv(). */ +void +wdp_recv_wait(struct wdp *wdp) +{ + wdp->wdp_class->recv_wait(wdp); +} + +/* Obtains the NetFlow engine type and engine ID for 'wdp' into '*engine_type' + * and '*engine_id', respectively. */ +void +wdp_get_netflow_ids(const struct wdp *wdp, + uint8_t *engine_type, uint8_t *engine_id) +{ + *engine_type = wdp->netflow_engine_type; + *engine_id = wdp->netflow_engine_id; +} + +/* ovs-vswitchd interface. + * + * This needs to be redesigned, because it only makes sense for wdp-xflow. The + * ofhooks are currently the key to implementing the OFPP_NORMAL feature of + * ovs-vswitchd. */ + +/* Sets the ofhooks for 'wdp' to 'ofhooks' with the accompanying 'aux' value. + * Only the xflow implementation of wdp is expected to implement this function; + * other implementations should just set it to NULL. + * + * The ofhooks are currently the key to implementing the OFPP_NORMAL feature of + * ovs-vswitchd. This design is not adequate for the long term; it needs to be + * redone. + * + * Returns 0 if successful, otherwise a positive errno value. */ +int +wdp_set_ofhooks(struct wdp *wdp, const struct ofhooks *ofhooks, void *aux) +{ + int error; + error = (wdp->wdp_class->set_ofhooks + ? wdp->wdp_class->set_ofhooks(wdp, ofhooks, aux) + : EOPNOTSUPP); + log_operation(wdp, "set_ofhooks", error); + return error; +} + +/* Tell 'wdp' to revalidate all the flows that match 'tag'. + * + * This needs to be redesigned, because it only makes sense for wdp-xflow. + * Other implementations cannot practically use this interface and should just + * set this to NULL. */ +void +wdp_revalidate(struct wdp *wdp, tag_type tag) +{ + if (wdp->wdp_class->revalidate) { + wdp->wdp_class->revalidate(wdp, tag); + } +} + +/* Tell 'wdp' to revalidate every flow. (This is not the same as calling + * 'revalidate' with all-1-bits for 'tag' because it also revalidates flows + * that do not have any tag at all.) + * + * This needs to be redesigned, because it only makes sense for wdp-xflow. + * Other implementations cannot practically use this interface and should just + * set this to NULL. */ +void +wdp_revalidate_all(struct wdp *wdp) +{ + if (wdp->wdp_class->revalidate_all) { + wdp->wdp_class->revalidate_all(wdp); + } +} + +/* Returns a copy of 'old'. The packet's payload, if any, is copied as well, + * but if it is longer than 'trim' bytes it is truncated to that length. */ +struct wdp_packet * +wdp_packet_clone(const struct wdp_packet *old, size_t trim) +{ + struct wdp_packet *new = xmemdup(old, sizeof *old); + if (old->payload) { + new->payload = ofpbuf_clone_data(old->payload->data, + MIN(trim, old->payload->size)); + } + return new; +} + +void +wdp_packet_destroy(struct wdp_packet *packet) +{ + if (packet) { + ofpbuf_delete(packet->payload); + free(packet); + } +} + +void +wdp_init(struct wdp *wdp, const struct wdp_class *wdp_class, + const char *name, + uint8_t netflow_engine_type, uint8_t netflow_engine_id) +{ + wdp->wdp_class = wdp_class; + wdp->base_name = xstrdup(name); + wdp->full_name = xasprintf("%s@%s", wdp_class->type, name); + wdp->netflow_engine_type = netflow_engine_type; + wdp->netflow_engine_id = netflow_engine_id; +} + +/* Undoes the results of initialization. + * + * Normally this function only needs to be called from wdp_close(). + * However, it may be called by providers due to an error on opening + * that occurs after initialization. It this case wdp_close() would + * never be called. */ +void +wdp_uninit(struct wdp *wdp, bool close) +{ + char *base_name = wdp->base_name; + char *full_name = wdp->full_name; + + if (close) { + wdp->wdp_class->close(wdp); + } + + free(base_name); + free(full_name); +} + +static void +log_operation(const struct wdp *wdp, const char *operation, int error) +{ + if (!error) { + VLOG_DBG_RL(&wdpmsg_rl, "%s: %s success", wdp_name(wdp), operation); + } else { + VLOG_WARN_RL(&error_rl, "%s: %s failed (%s)", + wdp_name(wdp), operation, strerror(error)); + } +} diff --cc utilities/ovs-dpctl.c index 59bfa389b,fe44f27e2..e1ae86792 --- a/utilities/ovs-dpctl.c +++ b/utilities/ovs-dpctl.c @@@ -39,10 -40,9 +39,10 @@@ #include "svec.h" #include "timeval.h" #include "util.h" + #include "vlog.h" +#include "xfif.h" - #include "vlog.h" - #define THIS_MODULE VLM_dpctl + VLOG_DEFINE_THIS_MODULE(dpctl) static const struct command all_commands[]; diff --cc utilities/ovs-ofctl.c index 4eeec5613,54183e58d..ac8fdb70c --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@@ -49,11 -50,10 +49,11 @@@ #include "timeval.h" #include "util.h" #include "vconn.h" + #include "vlog.h" +#include "xfif.h" #include "xtoxll.h" - #include "vlog.h" - #define THIS_MODULE VLM_ofctl + VLOG_DEFINE_THIS_MODULE(ofctl) #define DEFAULT_IDLE_TIMEOUT 60 diff --cc utilities/ovs-openflowd.c index 263bd9bec,bb77d5896..d21585efd --- a/utilities/ovs-openflowd.c +++ b/utilities/ovs-openflowd.c @@@ -44,10 -44,9 +44,10 @@@ #include "unixctl.h" #include "util.h" #include "vconn.h" + #include "vlog.h" +#include "xfif.h" - #include "vlog.h" - #define THIS_MODULE VLM_openflowd + VLOG_DEFINE_THIS_MODULE(openflowd) /* Settings that may be configured by the user. */ struct ofsettings { diff --cc vswitchd/bridge.c index 836c8b18e,be0e87581..2bc263dfd --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@@ -61,7 -63,7 +62,8 @@@ #include "vconn.h" #include "vswitchd/vswitch-idl.h" #include "xenserver.h" + #include "vlog.h" +#include "xfif.h" #include "xtoxll.h" #include "sflow_api.h" @@@ -640,15 -643,13 +642,13 @@@ bridge_reconfigure(const struct ovsrec_ struct shash_node *node; /* Get the set of interfaces currently in this datapath. */ - dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports); + xfif_port_list(br->xfif, &xfif_ports, &n_xfif_ports); shash_init(&cur_ifaces); - for (i = 0; i < n_dpif_ports; i++) { - const char *name = dpif_ports[i].devname; + for (i = 0; i < n_xfif_ports; i++) { + const char *name = xfif_ports[i].devname; - if (!shash_find(&cur_ifaces, name)) { - shash_add(&cur_ifaces, name, NULL); - } + shash_add_once(&cur_ifaces, name, NULL); } - free(dpif_ports); + free(xfif_ports); /* Get the set of interfaces we want on this datapath. */ bridge_get_all_ifaces(br, &want_ifaces); diff --cc vswitchd/ovs-vswitchd.c index 92379f930,541cdcbc8..b76f47274 --- a/vswitchd/ovs-vswitchd.c +++ b/vswitchd/ovs-vswitchd.c @@@ -45,11 -45,10 +45,11 @@@ #include "unixctl.h" #include "util.h" #include "vconn.h" + #include "vlog.h" #include "vswitchd/vswitch-idl.h" +#include "xfif.h" - #include "vlog.h" - #define THIS_MODULE VLM_vswitchd + VLOG_DEFINE_THIS_MODULE(vswitchd) static unixctl_cb_func ovs_vswitchd_exit;