From: Ben Pfaff Date: Tue, 22 Jun 2010 18:07:47 +0000 (-0700) Subject: Merge remote branch 'origin/master' into wdp-merge X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=060a2a4480e2f3e8fb2a3c6dfd4a468074a229aa;p=sliver-openvswitch.git Merge remote branch 'origin/master' into wdp-merge Conflicts: datapath/datapath.c datapath/vport.c datapath/vport.h include/openvswitch/automake.mk include/openvswitch/datapath-protocol.h lib/dpif-netdev.c lib/vconn.c lib/vconn.h lib/vlog-modules.def lib/xfif-provider.h ofproto/fail-open.c ofproto/ofproto-sflow.c ofproto/ofproto.c --- 060a2a4480e2f3e8fb2a3c6dfd4a468074a229aa diff --cc datapath/actions.c index baa58b7db,f7e51d925..ca9234c45 --- a/datapath/actions.c +++ b/datapath/actions.c @@@ -504,10 -519,18 +511,18 @@@ int execute_actions(struct datapath *dp skb = set_nw_tos(skb, key, &a->nw_tos, gfp); break; - case ODPAT_SET_TP_SRC: - case ODPAT_SET_TP_DST: + case XFLOWAT_SET_TP_SRC: + case XFLOWAT_SET_TP_DST: skb = set_tp_port(skb, key, &a->tp_port, gfp); break; + - case ODPAT_SET_PRIORITY: ++ case XFLOWAT_SET_PRIORITY: + skb->priority = a->priority.priority; + break; + - case ODPAT_POP_PRIORITY: ++ case XFLOWAT_POP_PRIORITY: + skb->priority = priority; + break; } if (!skb) return -ENOMEM; diff --cc datapath/datapath.c index d0a043130,c715f0ec7..6ae33a46d --- a/datapath/datapath.c +++ b/datapath/datapath.c @@@ -362,10 -363,10 +363,10 @@@ static int new_dp_port(struct datapath if (!vport) { vport_lock(); - if (odp_port->flags & ODP_PORT_INTERNAL) - vport = vport_add(odp_port->devname, "internal", NULL); + if (xflow_port->flags & XFLOW_PORT_INTERNAL) - vport = __vport_add(xflow_port->devname, "internal", NULL); ++ vport = vport_add(xflow_port->devname, "internal", NULL); else - vport = __vport_add(xflow_port->devname, "netdev", NULL); - vport = vport_add(odp_port->devname, "netdev", NULL); ++ vport = vport_add(xflow_port->devname, "netdev", NULL); vport_unlock(); @@@ -1615,36 -1598,40 +1595,40 @@@ static long openvswitch_ioctl(struct fi err = detach_port(dp_idx, port_no); goto exit; - case ODP_VPORT_ADD: - err = vport_user_add((struct odp_vport_add __user *)argp); + case XFLOW_VPORT_ADD: - err = vport_add((struct xflow_vport_add __user *)argp); ++ err = vport_user_add((struct xflow_vport_add __user *)argp); goto exit; - case ODP_VPORT_MOD: - err = vport_user_mod((struct odp_vport_mod __user *)argp); + case XFLOW_VPORT_MOD: - err = vport_mod((struct xflow_vport_mod __user *)argp); ++ err = vport_user_mod((struct xflow_vport_mod __user *)argp); goto exit; - case ODP_VPORT_DEL: + case XFLOW_VPORT_DEL: - err = vport_del((char __user *)argp); + err = vport_user_del((char __user *)argp); goto exit; - case ODP_VPORT_STATS_GET: - err = vport_user_stats_get((struct odp_vport_stats_req __user *)argp); + case XFLOW_VPORT_STATS_GET: - err = vport_stats_get((struct xflow_vport_stats_req __user *)argp); ++ err = vport_user_stats_get((struct xflow_vport_stats_req __user *)argp); + goto exit; + - case ODP_VPORT_STATS_SET: - err = vport_user_stats_set((struct odp_vport_stats_req __user *)argp); ++ case XFLOW_VPORT_STATS_SET: ++ err = vport_user_stats_set((struct xflow_vport_stats_req __user *)argp); goto exit; - case ODP_VPORT_ETHER_GET: - err = vport_user_ether_get((struct odp_vport_ether __user *)argp); + case XFLOW_VPORT_ETHER_GET: - err = vport_ether_get((struct xflow_vport_ether __user *)argp); ++ err = vport_user_ether_get((struct xflow_vport_ether __user *)argp); goto exit; - case ODP_VPORT_ETHER_SET: - err = vport_user_ether_set((struct odp_vport_ether __user *)argp); + case XFLOW_VPORT_ETHER_SET: - err = vport_ether_set((struct xflow_vport_ether __user *)argp); ++ err = vport_user_ether_set((struct xflow_vport_ether __user *)argp); goto exit; - case ODP_VPORT_MTU_GET: - err = vport_user_mtu_get((struct odp_vport_mtu __user *)argp); + case XFLOW_VPORT_MTU_GET: - err = vport_mtu_get((struct xflow_vport_mtu __user *)argp); ++ err = vport_user_mtu_get((struct xflow_vport_mtu __user *)argp); goto exit; - case ODP_VPORT_MTU_SET: - err = vport_user_mtu_set((struct odp_vport_mtu __user *)argp); + case XFLOW_VPORT_MTU_SET: - err = vport_mtu_set((struct xflow_vport_mtu __user *)argp); ++ err = vport_user_mtu_set((struct xflow_vport_mtu __user *)argp); goto exit; } @@@ -1986,31 -1974,32 +1970,32 @@@ static long openvswitch_compat_ioctl(st /* Ioctls that don't need any translation at all. */ return openvswitch_ioctl(f, cmd, argp); - case ODP_DP_CREATE: - case ODP_PORT_ATTACH: - case ODP_PORT_DETACH: - case ODP_VPORT_DEL: - case ODP_VPORT_MTU_SET: - case ODP_VPORT_MTU_GET: - case ODP_VPORT_ETHER_SET: - case ODP_VPORT_ETHER_GET: - case ODP_VPORT_STATS_SET: - case ODP_VPORT_STATS_GET: - case ODP_DP_STATS: - case ODP_GET_DROP_FRAGS: - case ODP_SET_DROP_FRAGS: - case ODP_SET_LISTEN_MASK: - case ODP_GET_LISTEN_MASK: - case ODP_SET_SFLOW_PROBABILITY: - case ODP_GET_SFLOW_PROBABILITY: - case ODP_PORT_QUERY: + case XFLOW_DP_CREATE: + case XFLOW_PORT_ATTACH: + case XFLOW_PORT_DETACH: + case XFLOW_VPORT_DEL: + case XFLOW_VPORT_MTU_SET: + case XFLOW_VPORT_MTU_GET: + case XFLOW_VPORT_ETHER_SET: + case XFLOW_VPORT_ETHER_GET: ++ case XFLOW_VPORT_STATS_SET: + case XFLOW_VPORT_STATS_GET: + case XFLOW_DP_STATS: + case XFLOW_GET_DROP_FRAGS: + case XFLOW_SET_DROP_FRAGS: + case XFLOW_SET_LISTEN_MASK: + case XFLOW_GET_LISTEN_MASK: + case XFLOW_SET_SFLOW_PROBABILITY: + case XFLOW_GET_SFLOW_PROBABILITY: + case XFLOW_PORT_QUERY: /* Ioctls that just need their pointer argument extended. */ return openvswitch_ioctl(f, cmd, (unsigned long)compat_ptr(argp)); - case ODP_VPORT_ADD32: + case XFLOW_VPORT_ADD32: - return compat_vport_add(compat_ptr(argp)); + return compat_vport_user_add(compat_ptr(argp)); - case ODP_VPORT_MOD32: + case XFLOW_VPORT_MOD32: - return compat_vport_mod(compat_ptr(argp)); + return compat_vport_user_mod(compat_ptr(argp)); } dp = get_dp_locked(dp_idx); diff --cc datapath/vport-internal_dev.c index d8e57fef8,c4937ed3d..b50293395 --- a/datapath/vport-internal_dev.c +++ b/datapath/vport-internal_dev.c @@@ -47,26 -28,32 +28,32 @@@ static inline struct internal_dev *inte return netdev_priv(netdev); } - static struct net_device_stats *internal_dev_get_stats(struct net_device *netdev) + /* This function is only called by the kernel network layer. It is not a vport + * get_stats() function. If a vport get_stats() function is defined that + * results in this being called it will cause infinite recursion. */ + static struct net_device_stats *internal_dev_sys_stats(struct net_device *netdev) { - struct internal_dev *internal_dev = internal_dev_priv(netdev); - struct net_device_stats *stats; - int i; - - stats = &internal_dev->stats; - stats->rx_bytes = internal_dev->extra_stats.rx_bytes; - stats->rx_packets = internal_dev->extra_stats.rx_packets; - stats->tx_bytes = internal_dev->extra_stats.tx_bytes; - stats->tx_packets = internal_dev->extra_stats.tx_packets; - for_each_possible_cpu(i) { - const struct pcpu_lstats *lb_stats; - - lb_stats = per_cpu_ptr(internal_dev->lstats, i); - stats->rx_bytes += lb_stats->rx_bytes; - stats->rx_packets += lb_stats->rx_packets; - stats->tx_bytes += lb_stats->tx_bytes; - stats->tx_packets += lb_stats->tx_packets; + struct vport *vport = internal_dev_get_vport(netdev); + struct net_device_stats *stats = &internal_dev_priv(netdev)->stats; + + if (vport) { - struct odp_vport_stats vport_stats; ++ struct xflow_vport_stats vport_stats; + + vport_get_stats(vport, &vport_stats); + + /* The tx and rx stats need to be swapped because the switch + * and host OS have opposite perspectives. */ + stats->rx_packets = vport_stats.tx_packets; + stats->tx_packets = vport_stats.rx_packets; + stats->rx_bytes = vport_stats.tx_bytes; + stats->tx_bytes = vport_stats.rx_bytes; + stats->rx_errors = vport_stats.tx_errors; + stats->tx_errors = vport_stats.rx_errors; + stats->rx_dropped = vport_stats.tx_dropped; + stats->tx_dropped = vport_stats.rx_dropped; + stats->collisions = vport_stats.collisions; } + return stats; } diff --cc datapath/vport-netdev.c index 9e0ecefa5,72d2928d5..2a46724b0 --- a/datapath/vport-netdev.c +++ b/datapath/vport-netdev.c @@@ -99,6 -100,16 +100,16 @@@ netdev_create(const char *name, const v goto error_put; } + /* If we are using the vport stats layer initialize it to the current + * values so we are roughly consistent with the device stats. */ + if (USE_VPORT_STATS) { - struct odp_vport_stats stats; ++ struct xflow_vport_stats stats; + + err = netdev_get_stats(vport, &stats); + if (!err) + vport_set_stats(vport, &stats); + } + return vport; error_put: diff --cc datapath/vport.c index 1cecfad4b,38c71476e..cfdd3e7dc --- a/datapath/vport.c +++ b/datapath/vport.c @@@ -182,17 -174,8 +174,8 @@@ vport_exit(void kfree(dev_table); } - /** - * vport_add - add vport device (for userspace callers) - * - * @uvport_config: New port configuration. - * - * Creates a new vport with the specified configuration (which is dependent - * on device type). This function is for userspace callers and assumes no - * locks are held. - */ static int -do_vport_add(struct odp_vport_add *vport_config) +do_vport_add(struct xflow_vport_add *vport_config) { struct vport *vport; int err = 0; @@@ -221,12 -204,21 +204,21 @@@ out return err; } + /** + * vport_user_add - add vport device (for userspace callers) + * + * @uvport_config: New port configuration. + * + * Creates a new vport with the specified configuration (which is dependent + * on device type). This function is for userspace callers and assumes no + * locks are held. + */ int - vport_add(const struct xflow_vport_add __user *uvport_config) -vport_user_add(const struct odp_vport_add __user *uvport_config) ++vport_user_add(const struct xflow_vport_add __user *uvport_config) { - struct odp_vport_add vport_config; + struct xflow_vport_add vport_config; - if (copy_from_user(&vport_config, uvport_config, sizeof(struct odp_vport_add))) + if (copy_from_user(&vport_config, uvport_config, sizeof(struct xflow_vport_add))) return -EFAULT; return do_vport_add(&vport_config); @@@ -234,12 -226,12 +226,12 @@@ #ifdef CONFIG_COMPAT int - compat_vport_add(struct compat_xflow_vport_add *ucompat) -compat_vport_user_add(struct compat_odp_vport_add *ucompat) ++compat_vport_user_add(struct compat_xflow_vport_add *ucompat) { - struct compat_odp_vport_add compat; - struct odp_vport_add vport_config; + struct compat_xflow_vport_add compat; + struct xflow_vport_add vport_config; - if (copy_from_user(&compat, ucompat, sizeof(struct compat_odp_vport_add))) + if (copy_from_user(&compat, ucompat, sizeof(struct compat_xflow_vport_add))) return -EFAULT; memcpy(vport_config.port_type, compat.port_type, VPORT_TYPE_SIZE); @@@ -250,17 -242,8 +242,8 @@@ } #endif - /** - * vport_mod - modify existing vport device (for userspace callers) - * - * @uvport_config: New configuration for vport - * - * Modifies an existing device with the specified configuration (which is - * dependent on device type). This function is for userspace callers and - * assumes no locks are held. - */ static int -do_vport_mod(struct odp_vport_mod *vport_config) +do_vport_mod(struct xflow_vport_mod *vport_config) { struct vport *vport; int err; @@@ -284,12 -267,21 +267,21 @@@ out return err; } + /** + * vport_user_mod - modify existing vport device (for userspace callers) + * + * @uvport_config: New configuration for vport + * + * Modifies an existing device with the specified configuration (which is + * dependent on device type). This function is for userspace callers and + * assumes no locks are held. + */ int - vport_mod(const struct xflow_vport_mod __user *uvport_config) -vport_user_mod(const struct odp_vport_mod __user *uvport_config) ++vport_user_mod(const struct xflow_vport_mod __user *uvport_config) { - struct odp_vport_mod vport_config; + struct xflow_vport_mod vport_config; - if (copy_from_user(&vport_config, uvport_config, sizeof(struct odp_vport_mod))) + if (copy_from_user(&vport_config, uvport_config, sizeof(struct xflow_vport_mod))) return -EFAULT; return do_vport_mod(&vport_config); @@@ -297,12 -289,12 +289,12 @@@ #ifdef CONFIG_COMPAT int - compat_vport_mod(struct compat_xflow_vport_mod *ucompat) -compat_vport_user_mod(struct compat_odp_vport_mod *ucompat) ++compat_vport_user_mod(struct compat_xflow_vport_mod *ucompat) { - struct compat_odp_vport_mod compat; - struct odp_vport_mod vport_config; + struct compat_xflow_vport_mod compat; + struct xflow_vport_mod vport_config; - if (copy_from_user(&compat, ucompat, sizeof(struct compat_odp_vport_mod))) + if (copy_from_user(&compat, ucompat, sizeof(struct compat_xflow_vport_mod))) return -EFAULT; memcpy(vport_config.devname, compat.devname, IFNAMSIZ); @@@ -384,9 -376,9 +376,9 @@@ out * function is for userspace callers and assumes no locks are held. */ int - vport_stats_get(struct xflow_vport_stats_req __user *ustats_req) -vport_user_stats_get(struct odp_vport_stats_req __user *ustats_req) ++vport_user_stats_get(struct xflow_vport_stats_req __user *ustats_req) { - struct odp_vport_stats_req stats_req; + struct xflow_vport_stats_req stats_req; struct vport *vport; int err; @@@ -403,45 -395,51 +395,51 @@@ goto out; } - if (vport->ops->get_stats) { - rcu_read_lock(); - err = vport->ops->get_stats(vport, &stats_req.stats); - rcu_read_unlock(); + err = vport_get_stats(vport, &stats_req.stats); - } else if (vport->ops->flags & VPORT_F_GEN_STATS) { - int i; + out: + vport_unlock(); - memset(&stats_req.stats, 0, sizeof(struct xflow_vport_stats)); + if (!err) - if (copy_to_user(ustats_req, &stats_req, sizeof(struct odp_vport_stats_req))) ++ if (copy_to_user(ustats_req, &stats_req, sizeof(struct xflow_vport_stats_req))) + err = -EFAULT; - for_each_possible_cpu(i) { - const struct vport_percpu_stats *percpu_stats; + return err; + } - percpu_stats = per_cpu_ptr(vport->percpu_stats, i); - stats_req.stats.rx_bytes += percpu_stats->rx_bytes; - stats_req.stats.rx_packets += percpu_stats->rx_packets; - stats_req.stats.tx_bytes += percpu_stats->tx_bytes; - stats_req.stats.tx_packets += percpu_stats->tx_packets; - } + /** + * vport_user_stats_set - sets offset device stats (for userspace callers) + * + * @ustats_req: Stats set parameters. + * + * Provides a set of transmit, receive, and error stats to be added as an + * offset to the collect data when stats are retreived. Some devices may not + * support setting the stats, in which case the result will always be + * -EOPNOTSUPP. This function is for userspace callers and assumes no locks + * are held. + */ + int -vport_user_stats_set(struct odp_vport_stats_req __user *ustats_req) ++vport_user_stats_set(struct xflow_vport_stats_req __user *ustats_req) + { - struct odp_vport_stats_req stats_req; ++ struct xflow_vport_stats_req stats_req; + struct vport *vport; + int err; - spin_lock_bh(&vport->err_stats.lock); - if (copy_from_user(&stats_req, ustats_req, sizeof(struct odp_vport_stats_req))) ++ if (copy_from_user(&stats_req, ustats_req, sizeof(struct xflow_vport_stats_req))) + return -EFAULT; - stats_req.stats.rx_dropped = vport->err_stats.rx_dropped; - stats_req.stats.rx_errors = vport->err_stats.rx_errors - + vport->err_stats.rx_frame_err - + vport->err_stats.rx_over_err - + vport->err_stats.rx_crc_err; - stats_req.stats.rx_frame_err = vport->err_stats.rx_frame_err; - stats_req.stats.rx_over_err = vport->err_stats.rx_over_err; - stats_req.stats.rx_crc_err = vport->err_stats.rx_crc_err; - stats_req.stats.tx_dropped = vport->err_stats.tx_dropped; - stats_req.stats.tx_errors = vport->err_stats.tx_errors; - stats_req.stats.collisions = vport->err_stats.collisions; + stats_req.devname[IFNAMSIZ - 1] = '\0'; - spin_unlock_bh(&vport->err_stats.lock); + rtnl_lock(); + vport_lock(); - err = 0; - } else - err = -EOPNOTSUPP; + vport = vport_locate(stats_req.devname); + if (!vport) { + err = -ENODEV; + goto out; + } + + err = vport_set_stats(vport, &stats_req.stats); out: vport_unlock(); @@@ -462,9 -457,9 +457,9 @@@ * userspace callers and assumes no locks are held. */ int - vport_ether_get(struct xflow_vport_ether __user *uvport_ether) -vport_user_ether_get(struct odp_vport_ether __user *uvport_ether) ++vport_user_ether_get(struct xflow_vport_ether __user *uvport_ether) { - struct odp_vport_ether vport_ether; + struct xflow_vport_ether vport_ether; struct vport *vport; int err = 0; @@@ -506,9 -501,9 +501,9 @@@ out * are held. */ int - vport_ether_set(struct xflow_vport_ether __user *uvport_ether) -vport_user_ether_set(struct odp_vport_ether __user *uvport_ether) ++vport_user_ether_set(struct xflow_vport_ether __user *uvport_ether) { - struct odp_vport_ether vport_ether; + struct xflow_vport_ether vport_ether; struct vport *vport; int err; @@@ -543,9 -538,9 +538,9 @@@ out * callers and assumes no locks are held. */ int - vport_mtu_get(struct xflow_vport_mtu __user *uvport_mtu) -vport_user_mtu_get(struct odp_vport_mtu __user *uvport_mtu) ++vport_user_mtu_get(struct xflow_vport_mtu __user *uvport_mtu) { - struct odp_vport_mtu vport_mtu; + struct xflow_vport_mtu vport_mtu; struct vport *vport; int err = 0; @@@ -584,9 -579,9 +579,9 @@@ out * for userspace callers and assumes no locks are held. */ int - vport_mtu_set(struct xflow_vport_mtu __user *uvport_mtu) -vport_user_mtu_set(struct odp_vport_mtu __user *uvport_mtu) ++vport_user_mtu_set(struct xflow_vport_mtu __user *uvport_mtu) { - struct odp_vport_mtu vport_mtu; + struct xflow_vport_mtu vport_mtu; struct vport *vport; int err; @@@ -926,6 -921,34 +921,34 @@@ vport_set_addr(struct vport *vport, con return -EOPNOTSUPP; } + /** + * vport_set_stats - sets offset device stats (for kernel callers) + * + * @vport: vport on which to set stats + * @stats: stats to set + * + * Provides a set of transmit, receive, and error stats to be added as an + * offset to the collect data when stats are retreived. Some devices may not + * support setting the stats, in which case the result will always be + * -EOPNOTSUPP. RTNL lock must be held. + */ + int -vport_set_stats(struct vport *vport, struct odp_vport_stats *stats) ++vport_set_stats(struct vport *vport, struct xflow_vport_stats *stats) + { + ASSERT_RTNL(); + + if (vport->ops->flags & VPORT_F_GEN_STATS) { + spin_lock_bh(&vport->stats_lock); - memcpy(&vport->offset_stats, stats, sizeof(struct odp_vport_stats)); ++ memcpy(&vport->offset_stats, stats, sizeof(struct xflow_vport_stats)); + spin_unlock_bh(&vport->stats_lock); + + return 0; + } else if (vport->ops->set_stats) + return vport->ops->set_stats(vport, stats); + else + return -EOPNOTSUPP; + } + /** * vport_get_name - retrieve device name * @@@ -1001,6 -1024,92 +1024,92 @@@ vport_get_kobj(const struct vport *vpor return NULL; } + /** + * vport_get_stats - retrieve device stats (for kernel callers) + * + * @vport: vport from which to retrieve the stats + * @stats: location to store stats + * + * Retrieves transmit, receive, and error stats for the given device. + */ + int -vport_get_stats(struct vport *vport, struct odp_vport_stats *stats) ++vport_get_stats(struct vport *vport, struct xflow_vport_stats *stats) + { - struct odp_vport_stats dev_stats; - struct odp_vport_stats *dev_statsp = NULL; ++ struct xflow_vport_stats dev_stats; ++ struct xflow_vport_stats *dev_statsp = NULL; + int err; + + if (vport->ops->get_stats) { + if (vport->ops->flags & VPORT_F_GEN_STATS) + dev_statsp = &dev_stats; + else + dev_statsp = stats; + + rcu_read_lock(); + err = vport->ops->get_stats(vport, dev_statsp); + rcu_read_unlock(); + + if (err) + goto out; + } + + if (vport->ops->flags & VPORT_F_GEN_STATS) { + int i; + + /* We potentially have 3 sources of stats that need to be + * combined: those we have collected (split into err_stats and + * percpu_stats), offset_stats from set_stats(), and device + * error stats from get_stats() (for errors that happen + * downstream and therefore aren't reported through our + * vport_record_error() function). */ + + spin_lock_bh(&vport->stats_lock); + - memcpy(stats, &vport->offset_stats, sizeof(struct odp_vport_stats)); ++ memcpy(stats, &vport->offset_stats, sizeof(struct xflow_vport_stats)); + + stats->rx_errors += vport->err_stats.rx_errors + + vport->err_stats.rx_frame_err + + vport->err_stats.rx_over_err + + vport->err_stats.rx_crc_err; + stats->tx_errors += vport->err_stats.tx_errors; + stats->tx_dropped += vport->err_stats.tx_dropped; + stats->rx_dropped += vport->err_stats.rx_dropped; + stats->rx_over_err += vport->err_stats.rx_over_err; + stats->rx_crc_err += vport->err_stats.rx_crc_err; + stats->rx_frame_err += vport->err_stats.rx_frame_err; + stats->collisions += vport->err_stats.collisions; + + spin_unlock_bh(&vport->stats_lock); + + if (dev_statsp) { + stats->rx_errors += dev_statsp->rx_errors; + stats->tx_errors += dev_statsp->tx_errors; + stats->rx_dropped += dev_statsp->rx_dropped; + stats->tx_dropped += dev_statsp->tx_dropped; + stats->rx_over_err += dev_statsp->rx_over_err; + stats->rx_crc_err += dev_statsp->rx_crc_err; + stats->rx_frame_err += dev_statsp->rx_frame_err; + stats->collisions += dev_statsp->collisions; + } + + for_each_possible_cpu(i) { + const struct vport_percpu_stats *percpu_stats; + + percpu_stats = per_cpu_ptr(vport->percpu_stats, i); + stats->rx_bytes += percpu_stats->rx_bytes; + stats->rx_packets += percpu_stats->rx_packets; + stats->tx_bytes += percpu_stats->tx_bytes; + stats->tx_packets += percpu_stats->tx_packets; + } + + err = 0; + } else + err = -EOPNOTSUPP; + + out: + return err; + } + /** * vport_get_flags - retrieve device flags * diff --cc datapath/vport.h index 5609d2386,7a3d527ae..bcbc4a8a9 --- a/datapath/vport.h +++ b/datapath/vport.h @@@ -22,6 -22,22 +22,22 @@@ struct dp_port /* The following definitions are for users of the vport subsytem: */ -int vport_user_add(const struct odp_vport_add __user *); -int vport_user_mod(const struct odp_vport_mod __user *); ++int vport_user_add(const struct xflow_vport_add __user *); ++int vport_user_mod(const struct xflow_vport_mod __user *); + int vport_user_del(const char __user *udevname); + + #ifdef CONFIG_COMPAT -int compat_vport_user_add(struct compat_odp_vport_add __user *); -int compat_vport_user_mod(struct compat_odp_vport_mod __user *); ++int compat_vport_user_add(struct compat_xflow_vport_add __user *); ++int compat_vport_user_mod(struct compat_xflow_vport_mod __user *); + #endif + -int vport_user_stats_get(struct odp_vport_stats_req __user *); -int vport_user_stats_set(struct odp_vport_stats_req __user *); -int vport_user_ether_get(struct odp_vport_ether __user *); -int vport_user_ether_set(struct odp_vport_ether __user *); -int vport_user_mtu_get(struct odp_vport_mtu __user *); -int vport_user_mtu_set(struct odp_vport_mtu __user *); ++int vport_user_stats_get(struct xflow_vport_stats_req __user *); ++int vport_user_stats_set(struct xflow_vport_stats_req __user *); ++int vport_user_ether_get(struct xflow_vport_ether __user *); ++int vport_user_ether_set(struct xflow_vport_ether __user *); ++int vport_user_mtu_get(struct xflow_vport_mtu __user *); ++int vport_user_mtu_set(struct xflow_vport_mtu __user *); + void vport_lock(void); void vport_unlock(void); @@@ -54,14 -55,15 +55,15 @@@ int vport_detach(struct vport *) int vport_set_mtu(struct vport *, int mtu); int vport_set_addr(struct vport *, const unsigned char *); -int vport_set_stats(struct vport *, struct odp_vport_stats *); ++int vport_set_stats(struct vport *, struct xflow_vport_stats *); const char *vport_get_name(const struct vport *); const char *vport_get_type(const struct vport *); const unsigned char *vport_get_addr(const struct vport *); struct dp_port *vport_get_dp_port(const struct vport *); - struct kobject *vport_get_kobj(const struct vport *); -int vport_get_stats(struct vport *, struct odp_vport_stats *); ++int vport_get_stats(struct vport *, struct xflow_vport_stats *); unsigned vport_get_flags(const struct vport *); int vport_is_running(const struct vport *); @@@ -102,7 -102,10 +102,10 @@@ struct vport struct dp_port *dp_port; struct vport_percpu_stats *percpu_stats; + + spinlock_t stats_lock; struct vport_err_stats err_stats; - struct odp_vport_stats offset_stats; ++ struct xflow_vport_stats offset_stats; }; #define VPORT_F_REQUIRED (1 << 0) /* If init fails, module loading fails. */ @@@ -168,6 -174,7 +174,7 @@@ struct vport_ops int (*set_mtu)(struct vport *, int mtu); int (*set_addr)(struct vport *, const unsigned char *); - int (*set_stats)(const struct vport *, struct odp_vport_stats *); ++ int (*set_stats)(const struct vport *, struct xflow_vport_stats *); /* Called with rcu_read_lock or RTNL lock. */ const char *(*get_name)(const struct vport *); diff --cc include/openvswitch/automake.mk index 2bdf0444d,92e071884..61859979f --- a/include/openvswitch/automake.mk +++ b/include/openvswitch/automake.mk @@@ -1,6 -1,5 +1,5 @@@ noinst_HEADERS += \ include/openvswitch/gre.h \ include/openvswitch/brcompat-netlink.h \ - include/openvswitch/internal_dev.h \ - include/openvswitch/datapath-protocol.h + include/openvswitch/xflow.h diff --cc include/openvswitch/xflow.h index ee97b43a3,000000000..7367bbfb3 mode 100644,000000..100644 --- a/include/openvswitch/xflow.h +++ b/include/openvswitch/xflow.h @@@ -1,426 -1,0 +1,437 @@@ +/* + * Copyright (c) 2009, 2010 Nicira Networks. + * + * This file is offered under your choice of two licenses: Apache 2.0 or GNU + * GPL 2.0 or later. The permission statements for each of these licenses is + * given below. You may license your modifications to this file under either + * of these licenses or both. If you wish to license your modifications under + * only one of these licenses, delete the permission text for the other + * license. + * + * ---------------------------------------------------------------------- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ---------------------------------------------------------------------- + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * ---------------------------------------------------------------------- + */ + +/* Protocol between userspace and kernel datapath. + * + * Be sure to update datapath/xflow-compat.h if you change any of the + * structures in here. */ + +#ifndef XFLOW_H +#define XFLOW_H 1 + +/* The ovs_be types indicate that an object is in big-endian, not + * native-endian, byte order. They are otherwise equivalent to uint_t. + * The Linux kernel already has __be types for this, which take on + * additional semantics when the "sparse" static checker is used, so we use + * those types when compiling the kernel. */ +#ifdef __KERNEL__ +#include +#define ovs_be16 __be16 +#define ovs_be32 __be32 +#define ovs_be64 __be64 +#else +#include +#define ovs_be16 uint16_t +#define ovs_be32 uint32_t +#define ovs_be64 uint64_t +#endif + +#define XFLOW_MAX 256 /* Maximum number of datapaths. */ + +#define XFLOW_DP_CREATE _IO('O', 0) +#define XFLOW_DP_DESTROY _IO('O', 1) +#define XFLOW_DP_STATS _IOW('O', 2, struct xflow_stats) + +#define XFLOW_GET_DROP_FRAGS _IOW('O', 3, int) +#define XFLOW_SET_DROP_FRAGS _IOR('O', 4, int) + +#define XFLOW_GET_LISTEN_MASK _IOW('O', 5, int) +#define XFLOW_SET_LISTEN_MASK _IOR('O', 6, int) + +#define XFLOW_PORT_ATTACH _IOR('O', 7, struct xflow_port) +#define XFLOW_PORT_DETACH _IOR('O', 8, int) +#define XFLOW_PORT_QUERY _IOWR('O', 9, struct xflow_port) +#define XFLOW_PORT_LIST _IOWR('O', 10, struct xflow_portvec) + +#define XFLOW_PORT_GROUP_SET _IOR('O', 11, struct xflow_port_group) +#define XFLOW_PORT_GROUP_GET _IOWR('O', 12, struct xflow_port_group) + +#define XFLOW_FLOW_GET _IOWR('O', 13, struct xflow_flow) +#define XFLOW_FLOW_PUT _IOWR('O', 14, struct xflow_flow) +#define XFLOW_FLOW_LIST _IOWR('O', 15, struct xflow_flowvec) +#define XFLOW_FLOW_FLUSH _IO('O', 16) +#define XFLOW_FLOW_DEL _IOWR('O', 17, struct xflow_flow) + +#define XFLOW_EXECUTE _IOR('O', 18, struct xflow_execute) + +#define XFLOW_SET_SFLOW_PROBABILITY _IOR('O', 19, int) +#define XFLOW_GET_SFLOW_PROBABILITY _IOW('O', 20, int) + +#define XFLOW_VPORT_ADD _IOR('O', 21, struct xflow_vport_add) +#define XFLOW_VPORT_MOD _IOR('O', 22, struct xflow_vport_mod) +#define XFLOW_VPORT_DEL _IO('O', 23) +#define XFLOW_VPORT_STATS_GET _IOWR('O', 24, struct xflow_vport_stats_req) +#define XFLOW_VPORT_ETHER_GET _IOWR('O', 25, struct xflow_vport_ether) +#define XFLOW_VPORT_ETHER_SET _IOW('O', 26, struct xflow_vport_ether) +#define XFLOW_VPORT_MTU_GET _IOWR('O', 27, struct xflow_vport_mtu) +#define XFLOW_VPORT_MTU_SET _IOW('O', 28, struct xflow_vport_mtu) ++#define XFLOW_VPORT_STATS_SET _IOWR('O', 29, struct xflow_vport_stats_req) + +struct xflow_stats { + /* Flows. */ + uint32_t n_flows; /* Number of flows in flow table. */ + uint32_t cur_capacity; /* Current flow table capacity. */ + uint32_t max_capacity; /* Maximum expansion of flow table capacity. */ + + /* Ports. */ + uint32_t n_ports; /* Current number of ports. */ + uint32_t max_ports; /* Maximum supported number of ports. */ + uint16_t max_groups; /* Maximum number of port groups. */ + uint16_t reserved; + + /* Lookups. */ + uint64_t n_frags; /* Number of dropped IP fragments. */ + uint64_t n_hit; /* Number of flow table matches. */ + uint64_t n_missed; /* Number of flow table misses. */ + uint64_t n_lost; /* Number of misses not sent to userspace. */ + + /* Queues. */ + uint16_t max_miss_queue; /* Max length of XFLOWL_MISS queue. */ + uint16_t max_action_queue; /* Max length of XFLOWL_ACTION queue. */ + uint16_t max_sflow_queue; /* Max length of XFLOWL_SFLOW queue. */ +}; + +/* Logical ports. */ +#define XFLOWP_LOCAL ((uint16_t)0) +#define XFLOWP_NONE ((uint16_t)-1) +#define XFLOWP_NORMAL ((uint16_t)-2) + +/* Listening channels. */ +#define _XFLOWL_MISS_NR 0 /* Packet missed in flow table. */ +#define XFLOWL_MISS (1 << _XFLOWL_MISS_NR) +#define _XFLOWL_ACTION_NR 1 /* Packet output to XFLOWP_CONTROLLER. */ +#define XFLOWL_ACTION (1 << _XFLOWL_ACTION_NR) +#define _XFLOWL_SFLOW_NR 2 /* sFlow samples. */ +#define XFLOWL_SFLOW (1 << _XFLOWL_SFLOW_NR) +#define XFLOWL_ALL (XFLOWL_MISS | XFLOWL_ACTION | XFLOWL_SFLOW) + +/** + * struct xflow_msg - format of messages read from datapath fd. + * @type: One of the %_XFLOWL_* constants. + * @length: Total length of message, including this header. + * @port: Port that received the packet embedded in this message. + * @reserved: Not currently used. Should be set to 0. + * @arg: Argument value whose meaning depends on @type. + * + * For @type == %_XFLOWL_MISS_NR, the header is followed by packet data. The + * @arg member is the ID (in network byte order) of the tunnel that + * encapsulated this packet. It is 0 if the packet was not received on a tunnel. * + * For @type == %_XFLOWL_ACTION_NR, the header is followed by packet data. The + * @arg member is copied from the &struct xflow_action_controller that caused + * the &struct xflow_msg to be composed. + * + * For @type == %_XFLOWL_SFLOW_NR, the header is followed by &struct + * xflow_sflow_sample_header, then by an array of &union xflow_action (the + * number of which is specified in &struct xflow_sflow_sample_header), then by + * packet data. + */ +struct xflow_msg { + uint32_t type; + uint32_t length; + uint16_t port; + uint16_t reserved; + uint32_t arg; +}; + +/** + * struct xflow_sflow_sample_header - header added to sFlow sampled packet. + * @sample_pool: Number of packets that were candidates for sFlow sampling, + * regardless of whether they were actually chosen and sent down to userspace. + * @n_actions: Number of "union xflow_action"s immediately following this + * header. + * + * This header follows &struct xflow_msg when that structure's @type is + * %_XFLOWL_SFLOW_NR, and it is itself followed by an array of &union + * xflow_action (the number of which is specified in @n_actions) and then by + * packet data. + */ +struct xflow_sflow_sample_header { + uint32_t sample_pool; + uint32_t n_actions; +}; + +#define XFLOW_PORT_INTERNAL (1 << 0) /* This port is simulated. */ +struct xflow_port { + char devname[16]; /* IFNAMSIZ */ + uint16_t port; + uint16_t flags; + uint32_t reserved2; +}; + +struct xflow_portvec { + struct xflow_port *ports; + uint32_t n_ports; +}; + +struct xflow_port_group { + uint16_t *ports; + uint16_t n_ports; /* Number of ports. */ + uint16_t group; /* Group number. */ +}; + +struct xflow_flow_stats { + uint64_t n_packets; /* Number of matched packets. */ + uint64_t n_bytes; /* Number of matched bytes. */ - uint64_t used_sec; /* Time last used. */ ++ uint64_t used_sec; /* Time last used, in system monotonic time. */ + uint32_t used_nsec; + uint8_t tcp_flags; + uint8_t ip_tos; + uint16_t error; /* Used by XFLOW_FLOW_GET. */ +}; + +/* + * The datapath protocol adopts the Linux convention for TCI fields: if an + * 802.1Q header is present then its TCI value is used verbatim except that the + * CFI bit (0x1000) is always set to 1, and all-bits-zero indicates no 802.1Q + * header. + */ +#define XFLOW_TCI_PRESENT 0x1000 /* CFI bit */ + +struct xflow_key { + ovs_be32 tun_id; /* Encapsulating tunnel ID. */ + ovs_be32 nw_src; /* IP source address. */ + ovs_be32 nw_dst; /* IP destination address. */ + uint16_t in_port; /* Input switch port. */ + ovs_be16 dl_tci; /* All zeros if 802.1Q header absent, + * XFLOW_TCI_PRESENT set if present. */ + ovs_be16 dl_type; /* Ethernet frame type. */ + ovs_be16 tp_src; /* TCP/UDP source port. */ + ovs_be16 tp_dst; /* TCP/UDP destination port. */ + uint8_t dl_src[6]; /* Ethernet source address. */ + uint8_t dl_dst[6]; /* Ethernet destination address. */ + uint8_t nw_proto; /* IP protocol or low 8 bits of ARP opcode. */ + uint8_t nw_tos; /* IP ToS (DSCP field, 6 bits). */ +}; + +/* Flags for XFLOW_FLOW. */ +#define XFLOWFF_ZERO_TCP_FLAGS (1 << 0) /* Zero the TCP flags. */ + +struct xflow_flow { + struct xflow_flow_stats stats; + struct xflow_key key; + union xflow_action *actions; + uint32_t n_actions; + uint32_t flags; +}; + +/* Flags for XFLOW_FLOW_PUT. */ +#define XFLOWPF_CREATE (1 << 0) /* Allow creating a new flow. */ +#define XFLOWPF_MODIFY (1 << 1) /* Allow modifying an existing flow. */ +#define XFLOWPF_ZERO_STATS (1 << 2) /* Zero the stats of existing flow. */ + +/* XFLOW_FLOW_PUT argument. */ +struct xflow_flow_put { + struct xflow_flow flow; + uint32_t flags; +}; + +struct xflow_flowvec { + struct xflow_flow *flows; + uint32_t n_flows; +}; + +/* Action types. */ +#define XFLOWAT_OUTPUT 0 /* Output to switch port. */ +#define XFLOWAT_OUTPUT_GROUP 1 /* Output to all ports in group. */ +#define XFLOWAT_CONTROLLER 2 /* Send copy to controller. */ +#define XFLOWAT_SET_DL_TCI 3 /* Set the 802.1q VLAN VID and/or PCP. */ +#define XFLOWAT_STRIP_VLAN 4 /* Strip the 802.1q header. */ +#define XFLOWAT_SET_DL_SRC 5 /* Ethernet source address. */ +#define XFLOWAT_SET_DL_DST 6 /* Ethernet destination address. */ +#define XFLOWAT_SET_NW_SRC 7 /* IP source address. */ +#define XFLOWAT_SET_NW_DST 8 /* IP destination address. */ +#define XFLOWAT_SET_NW_TOS 9 /* IP ToS/DSCP field (6 bits). */ +#define XFLOWAT_SET_TP_SRC 10 /* TCP/UDP source port. */ +#define XFLOWAT_SET_TP_DST 11 /* TCP/UDP destination port. */ +#define XFLOWAT_SET_TUNNEL 12 /* Set the encapsulating tunnel ID. */ - #define XFLOWAT_N_ACTIONS 13 ++#define XFLOWAT_SET_PRIORITY 14 /* Set skb->priority. */ ++#define XFLOWAT_POP_PRIORITY 15 /* Restore original skb->priority. */ ++#define XFLOWAT_N_ACTIONS 16 + +struct xflow_action_output { + uint16_t type; /* XFLOWAT_OUTPUT. */ + uint16_t port; /* Output port. */ + uint16_t reserved1; + uint16_t reserved2; +}; + +struct xflow_action_output_group { + uint16_t type; /* XFLOWAT_OUTPUT_GROUP. */ + uint16_t group; /* Group number. */ + uint16_t reserved1; + uint16_t reserved2; +}; + +struct xflow_action_controller { + uint16_t type; /* XFLOWAT_OUTPUT_CONTROLLER. */ + uint16_t reserved; + uint32_t arg; /* Copied to struct xflow_msg 'arg' member. */ +}; + +struct xflow_action_tunnel { + uint16_t type; /* XFLOWAT_SET_TUNNEL. */ + uint16_t reserved; + ovs_be32 tun_id; /* Tunnel ID. */ +}; + +/* Action structure for XFLOWAT_SET_DL_TCI. */ +struct xflow_action_dl_tci { + uint16_t type; /* XFLOWAT_SET_DL_TCI. */ + ovs_be16 tci; /* New TCI. Bits not in mask must be zero. */ + ovs_be16 mask; /* 0x0fff to set VID, 0xe000 to set PCP, + * or 0xefff to set both. */ + uint16_t reserved; +}; + +/* Action structure for XFLOWAT_SET_DL_SRC/DST. */ +struct xflow_action_dl_addr { + uint16_t type; /* XFLOWAT_SET_DL_SRC/DST. */ + uint8_t dl_addr[6]; /* Ethernet address. */ +}; + +/* Action structure for XFLOWAT_SET_NW_SRC/DST. */ +struct xflow_action_nw_addr { + uint16_t type; /* XFLOWAT_SET_TW_SRC/DST. */ + uint16_t reserved; + ovs_be32 nw_addr; /* IP address. */ +}; + +struct xflow_action_nw_tos { + uint16_t type; /* XFLOWAT_SET_NW_TOS. */ + uint8_t nw_tos; /* IP ToS/DSCP field (6 bits). */ + uint8_t reserved1; + uint16_t reserved2; + uint16_t reserved3; +}; + ++/* Action structure for XFLOWAT_SET_PRIORITY. */ ++struct xflow_action_priority { ++ uint16_t type; /* XFLOWAT_SET_PRIORITY. */ ++ uint16_t reserved; ++ uint32_t priority; /* skb->priority value. */ ++}; ++ +/* Action structure for XFLOWAT_SET_TP_SRC/DST. */ +struct xflow_action_tp_port { + uint16_t type; /* XFLOWAT_SET_TP_SRC/DST. */ + ovs_be16 tp_port; /* TCP/UDP port. */ + uint16_t reserved1; + uint16_t reserved2; +}; + +union xflow_action { + uint16_t type; + struct xflow_action_output output; + struct xflow_action_output_group output_group; + struct xflow_action_controller controller; + struct xflow_action_tunnel tunnel; + struct xflow_action_dl_tci dl_tci; + struct xflow_action_dl_addr dl_addr; + struct xflow_action_nw_addr nw_addr; + struct xflow_action_nw_tos nw_tos; + struct xflow_action_tp_port tp_port; ++ struct xflow_action_priority priority; +}; + +struct xflow_execute { + uint16_t in_port; + uint16_t reserved1; + uint32_t reserved2; + + union xflow_action *actions; + uint32_t n_actions; + + const void *data; + uint32_t length; +}; + +#define VPORT_TYPE_SIZE 16 +struct xflow_vport_add { + char port_type[VPORT_TYPE_SIZE]; + char devname[16]; /* IFNAMSIZ */ + void *config; +}; + +struct xflow_vport_mod { + char devname[16]; /* IFNAMSIZ */ + void *config; +}; + +struct xflow_vport_stats { + uint64_t rx_packets; + uint64_t tx_packets; + uint64_t rx_bytes; + uint64_t tx_bytes; + uint64_t rx_dropped; + uint64_t tx_dropped; + uint64_t rx_errors; + uint64_t tx_errors; + uint64_t rx_frame_err; + uint64_t rx_over_err; + uint64_t rx_crc_err; + uint64_t collisions; +}; + +struct xflow_vport_stats_req { + char devname[16]; /* IFNAMSIZ */ + struct xflow_vport_stats stats; +}; + +struct xflow_vport_ether { + char devname[16]; /* IFNAMSIZ */ + unsigned char ether_addr[6]; +}; + +struct xflow_vport_mtu { + char devname[16]; /* IFNAMSIZ */ + uint16_t mtu; +}; + +/* Values below this cutoff are 802.3 packets and the two bytes + * following MAC addresses are used as a frame length. Otherwise, the + * two bytes are used as the Ethernet type. + */ +#define XFLOW_DL_TYPE_ETH2_CUTOFF 0x0600 + +/* Value of dl_type to indicate that the frame does not include an + * Ethernet type. + */ +#define XFLOW_DL_TYPE_NOT_ETH_TYPE 0x05ff + +#endif /* openvswitch/xflow.h */ diff --cc lib/automake.mk index 17fa4bccc,71e4d61fb..80c92dca8 --- a/lib/automake.mk +++ b/lib/automake.mk @@@ -61,8 -65,12 +61,10 @@@ lib_libopenvswitch_a_SOURCES = lib/netdev-provider.h \ lib/netdev.c \ lib/netdev.h \ - lib/odp-util.c \ - lib/odp-util.h \ lib/ofp-print.c \ lib/ofp-print.h \ + lib/ofp-util.c \ + lib/ofp-util.h \ lib/ofpbuf.c \ lib/ofpbuf.h \ lib/ovsdb-data.c \ diff --cc lib/netdev-vport.c index 911e5601b,58858f90f..28730b5c8 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@@ -158,6 -158,40 +158,40 @@@ netdev_vport_get_stats(const struct net return 0; } + int + netdev_vport_set_stats(struct netdev *netdev, const struct netdev_stats *stats) + { - struct odp_vport_stats_req ovsr; ++ struct xflow_vport_stats_req ovsr; + int err; + + ovs_strlcpy(ovsr.devname, netdev_get_name(netdev), sizeof ovsr.devname); + + ovsr.stats.rx_packets = stats->rx_packets; + ovsr.stats.tx_packets = stats->tx_packets; + ovsr.stats.rx_bytes = stats->rx_bytes; + ovsr.stats.tx_bytes = stats->tx_bytes; + ovsr.stats.rx_errors = stats->rx_errors; + ovsr.stats.tx_errors = stats->tx_errors; + ovsr.stats.rx_dropped = stats->rx_dropped; + ovsr.stats.tx_dropped = stats->tx_dropped; + ovsr.stats.collisions = stats->collisions; + ovsr.stats.rx_over_err = stats->rx_over_errors; + ovsr.stats.rx_crc_err = stats->rx_crc_errors; + ovsr.stats.rx_frame_err = stats->rx_frame_errors; + - err = netdev_vport_do_ioctl(ODP_VPORT_STATS_SET, &ovsr); ++ err = netdev_vport_do_ioctl(XFLOW_VPORT_STATS_SET, &ovsr); + + /* If the vport layer doesn't know about the device, that doesn't mean it + * doesn't exist (after all were able to open it when netdev_open() was + * called), it just means that it isn't attached and we'll be getting + * stats a different way. */ + if (err == ENODEV) { + err = EOPNOTSUPP; + } + + return err; + } + int netdev_vport_update_flags(struct netdev *netdev OVS_UNUSED, enum netdev_flags off, enum netdev_flags on OVS_UNUSED, diff --cc lib/netdev.h index e8fa1bb29,cd5c8c300..dbac65d6c --- a/lib/netdev.h +++ b/lib/netdev.h @@@ -97,9 -97,9 +97,10 @@@ int netdev_register_provider(const stru int netdev_unregister_provider(const char *type); void netdev_enumerate_types(struct svec *types); + /* Open and close. */ int netdev_open(struct netdev_options *, struct netdev **); int netdev_open_default(const char *name, struct netdev **); +struct netdev *netdev_reopen(struct netdev *); int netdev_reconfigure(struct netdev *, const struct shash *args); void netdev_close(struct netdev *); diff --cc lib/ofp-util.c index 000000000,e990f0f44..1fc303030 mode 000000,100644..100644 --- a/lib/ofp-util.c +++ b/lib/ofp-util.c @@@ -1,0 -1,751 +1,751 @@@ + /* + * Copyright (c) 2008, 2009, 2010 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + #include + #include "ofp-print.h" + #include + #include + #include "ofp-util.h" + #include "ofpbuf.h" + #include "packets.h" + #include "random.h" + #include "xtoxll.h" + + #define THIS_MODULE VLM_ofp_util + #include "vlog.h" + + /* Rate limit for OpenFlow message parse errors. These always indicate a bug + * in the peer and so there's not much point in showing a lot of them. */ + static struct vlog_rate_limit bad_ofmsg_rl = VLOG_RATE_LIMIT_INIT(1, 5); + + /* XXX we should really use consecutive xids to avoid probabilistic + * failures. */ + static inline uint32_t + alloc_xid(void) + { + return random_uint32(); + } + + /* Allocates and stores in '*bufferp' a new ofpbuf with a size of + * 'openflow_len', starting with an OpenFlow header with the given 'type' and + * an arbitrary transaction id. Allocated bytes beyond the header, if any, are + * zeroed. + * + * The caller is responsible for freeing '*bufferp' when it is no longer + * needed. + * + * The OpenFlow header length is initially set to 'openflow_len'; if the + * message is later extended, the length should be updated with + * update_openflow_length() before sending. + * + * Returns the header. */ + void * + make_openflow(size_t openflow_len, uint8_t type, struct ofpbuf **bufferp) + { + *bufferp = ofpbuf_new(openflow_len); + return put_openflow_xid(openflow_len, type, alloc_xid(), *bufferp); + } + + /* Allocates and stores in '*bufferp' a new ofpbuf with a size of + * 'openflow_len', starting with an OpenFlow header with the given 'type' and + * transaction id 'xid'. Allocated bytes beyond the header, if any, are + * zeroed. + * + * The caller is responsible for freeing '*bufferp' when it is no longer + * needed. + * + * The OpenFlow header length is initially set to 'openflow_len'; if the + * message is later extended, the length should be updated with + * update_openflow_length() before sending. + * + * Returns the header. */ + void * + make_openflow_xid(size_t openflow_len, uint8_t type, uint32_t xid, + struct ofpbuf **bufferp) + { + *bufferp = ofpbuf_new(openflow_len); + return put_openflow_xid(openflow_len, type, xid, *bufferp); + } + + /* Appends 'openflow_len' bytes to 'buffer', starting with an OpenFlow header + * with the given 'type' and an arbitrary transaction id. Allocated bytes + * beyond the header, if any, are zeroed. + * + * The OpenFlow header length is initially set to 'openflow_len'; if the + * message is later extended, the length should be updated with + * update_openflow_length() before sending. + * + * Returns the header. */ + void * + put_openflow(size_t openflow_len, uint8_t type, struct ofpbuf *buffer) + { + return put_openflow_xid(openflow_len, type, alloc_xid(), buffer); + } + + /* Appends 'openflow_len' bytes to 'buffer', starting with an OpenFlow header + * with the given 'type' and an transaction id 'xid'. Allocated bytes beyond + * the header, if any, are zeroed. + * + * The OpenFlow header length is initially set to 'openflow_len'; if the + * message is later extended, the length should be updated with + * update_openflow_length() before sending. + * + * Returns the header. */ + void * + put_openflow_xid(size_t openflow_len, uint8_t type, uint32_t xid, + struct ofpbuf *buffer) + { + struct ofp_header *oh; + + assert(openflow_len >= sizeof *oh); + assert(openflow_len <= UINT16_MAX); + + oh = ofpbuf_put_uninit(buffer, openflow_len); + oh->version = OFP_VERSION; + oh->type = type; + oh->length = htons(openflow_len); + oh->xid = xid; + memset(oh + 1, 0, openflow_len - sizeof *oh); + return oh; + } + + /* Updates the 'length' field of the OpenFlow message in 'buffer' to + * 'buffer->size'. */ + void + update_openflow_length(struct ofpbuf *buffer) + { + struct ofp_header *oh = ofpbuf_at_assert(buffer, 0, sizeof *oh); + oh->length = htons(buffer->size); + } + + struct ofpbuf * + make_flow_mod(uint16_t command, const flow_t *flow, size_t actions_len) + { + struct ofp_flow_mod *ofm; + size_t size = sizeof *ofm + actions_len; + struct ofpbuf *out = ofpbuf_new(size); + ofm = ofpbuf_put_zeros(out, sizeof *ofm); + ofm->header.version = OFP_VERSION; + ofm->header.type = OFPT_FLOW_MOD; + ofm->header.length = htons(size); + ofm->cookie = 0; + ofm->match.wildcards = htonl(0); - ofm->match.in_port = htons(flow->in_port == ODPP_LOCAL ? OFPP_LOCAL ++ ofm->match.in_port = htons(flow->in_port == XFLOWP_LOCAL ? OFPP_LOCAL + : flow->in_port); + memcpy(ofm->match.dl_src, flow->dl_src, sizeof ofm->match.dl_src); + memcpy(ofm->match.dl_dst, flow->dl_dst, sizeof ofm->match.dl_dst); + ofm->match.dl_vlan = flow->dl_vlan; + ofm->match.dl_vlan_pcp = flow->dl_vlan_pcp; + ofm->match.dl_type = flow->dl_type; + ofm->match.nw_src = flow->nw_src; + ofm->match.nw_dst = flow->nw_dst; + ofm->match.nw_proto = flow->nw_proto; + ofm->match.nw_tos = flow->nw_tos; + ofm->match.tp_src = flow->tp_src; + ofm->match.tp_dst = flow->tp_dst; + ofm->command = htons(command); + return out; + } + + struct ofpbuf * + make_add_flow(const flow_t *flow, uint32_t buffer_id, + uint16_t idle_timeout, size_t actions_len) + { + struct ofpbuf *out = make_flow_mod(OFPFC_ADD, flow, actions_len); + struct ofp_flow_mod *ofm = out->data; + ofm->idle_timeout = htons(idle_timeout); + ofm->hard_timeout = htons(OFP_FLOW_PERMANENT); + ofm->buffer_id = htonl(buffer_id); + return out; + } + + struct ofpbuf * + make_del_flow(const flow_t *flow) + { + struct ofpbuf *out = make_flow_mod(OFPFC_DELETE_STRICT, flow, 0); + struct ofp_flow_mod *ofm = out->data; + ofm->out_port = htons(OFPP_NONE); + return out; + } + + struct ofpbuf * + make_add_simple_flow(const flow_t *flow, + uint32_t buffer_id, uint16_t out_port, + uint16_t idle_timeout) + { + struct ofp_action_output *oao; + struct ofpbuf *buffer = make_add_flow(flow, buffer_id, idle_timeout, + sizeof *oao); + oao = ofpbuf_put_zeros(buffer, sizeof *oao); + oao->type = htons(OFPAT_OUTPUT); + oao->len = htons(sizeof *oao); + oao->port = htons(out_port); + return buffer; + } + + struct ofpbuf * + make_packet_in(uint32_t buffer_id, uint16_t in_port, uint8_t reason, + const struct ofpbuf *payload, int max_send_len) + { + struct ofp_packet_in *opi; + struct ofpbuf *buf; + int send_len; + + send_len = MIN(max_send_len, payload->size); + buf = ofpbuf_new(sizeof *opi + send_len); + opi = put_openflow_xid(offsetof(struct ofp_packet_in, data), + OFPT_PACKET_IN, 0, buf); + opi->buffer_id = htonl(buffer_id); + opi->total_len = htons(payload->size); + opi->in_port = htons(in_port); + opi->reason = reason; + ofpbuf_put(buf, payload->data, send_len); + update_openflow_length(buf); + + return buf; + } + + struct ofpbuf * + make_packet_out(const struct ofpbuf *packet, uint32_t buffer_id, + uint16_t in_port, + const struct ofp_action_header *actions, size_t n_actions) + { + size_t actions_len = n_actions * sizeof *actions; + struct ofp_packet_out *opo; + size_t size = sizeof *opo + actions_len + (packet ? packet->size : 0); + struct ofpbuf *out = ofpbuf_new(size); + + opo = ofpbuf_put_uninit(out, sizeof *opo); + opo->header.version = OFP_VERSION; + opo->header.type = OFPT_PACKET_OUT; + opo->header.length = htons(size); + opo->header.xid = htonl(0); + opo->buffer_id = htonl(buffer_id); - opo->in_port = htons(in_port == ODPP_LOCAL ? OFPP_LOCAL : in_port); ++ opo->in_port = htons(in_port == XFLOWP_LOCAL ? XFLOWP_LOCAL : in_port); + opo->actions_len = htons(actions_len); + ofpbuf_put(out, actions, actions_len); + if (packet) { + ofpbuf_put(out, packet->data, packet->size); + } + return out; + } + + struct ofpbuf * + make_unbuffered_packet_out(const struct ofpbuf *packet, + uint16_t in_port, uint16_t out_port) + { + struct ofp_action_output action; + action.type = htons(OFPAT_OUTPUT); + action.len = htons(sizeof action); + action.port = htons(out_port); + return make_packet_out(packet, UINT32_MAX, in_port, + (struct ofp_action_header *) &action, 1); + } + + struct ofpbuf * + make_buffered_packet_out(uint32_t buffer_id, + uint16_t in_port, uint16_t out_port) + { + struct ofp_action_output action; + action.type = htons(OFPAT_OUTPUT); + action.len = htons(sizeof action); + action.port = htons(out_port); + return make_packet_out(NULL, buffer_id, in_port, + (struct ofp_action_header *) &action, 1); + } + + /* Creates and returns an OFPT_ECHO_REQUEST message with an empty payload. */ + struct ofpbuf * + make_echo_request(void) + { + struct ofp_header *rq; + struct ofpbuf *out = ofpbuf_new(sizeof *rq); + rq = ofpbuf_put_uninit(out, sizeof *rq); + rq->version = OFP_VERSION; + rq->type = OFPT_ECHO_REQUEST; + rq->length = htons(sizeof *rq); + rq->xid = 0; + return out; + } + + /* Creates and returns an OFPT_ECHO_REPLY message matching the + * OFPT_ECHO_REQUEST message in 'rq'. */ + struct ofpbuf * + make_echo_reply(const struct ofp_header *rq) + { + size_t size = ntohs(rq->length); + struct ofpbuf *out = ofpbuf_new(size); + struct ofp_header *reply = ofpbuf_put(out, rq, size); + reply->type = OFPT_ECHO_REPLY; + return out; + } + + static int + check_message_type(uint8_t got_type, uint8_t want_type) + { + if (got_type != want_type) { + char *want_type_name = ofp_message_type_to_string(want_type); + char *got_type_name = ofp_message_type_to_string(got_type); + VLOG_WARN_RL(&bad_ofmsg_rl, + "received bad message type %s (expected %s)", + got_type_name, want_type_name); + free(want_type_name); + free(got_type_name); + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_TYPE); + } + return 0; + } + + /* Checks that 'msg' has type 'type' and that it is exactly 'size' bytes long. + * Returns 0 if the checks pass, otherwise an OpenFlow error code (produced + * with ofp_mkerr()). */ + int + check_ofp_message(const struct ofp_header *msg, uint8_t type, size_t size) + { + size_t got_size; + int error; + + error = check_message_type(msg->type, type); + if (error) { + return error; + } + + got_size = ntohs(msg->length); + if (got_size != size) { + char *type_name = ofp_message_type_to_string(type); + VLOG_WARN_RL(&bad_ofmsg_rl, + "received %s message of length %zu (expected %zu)", + type_name, got_size, size); + free(type_name); + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LEN); + } + + return 0; + } + + /* Checks that 'msg' has type 'type' and that 'msg' is 'size' plus a + * nonnegative integer multiple of 'array_elt_size' bytes long. Returns 0 if + * the checks pass, otherwise an OpenFlow error code (produced with + * ofp_mkerr()). + * + * If 'n_array_elts' is nonnull, then '*n_array_elts' is set to the number of + * 'array_elt_size' blocks in 'msg' past the first 'min_size' bytes, when + * successful. */ + int + check_ofp_message_array(const struct ofp_header *msg, uint8_t type, + size_t min_size, size_t array_elt_size, + size_t *n_array_elts) + { + size_t got_size; + int error; + + assert(array_elt_size); + + error = check_message_type(msg->type, type); + if (error) { + return error; + } + + got_size = ntohs(msg->length); + if (got_size < min_size) { + char *type_name = ofp_message_type_to_string(type); + VLOG_WARN_RL(&bad_ofmsg_rl, "received %s message of length %zu " + "(expected at least %zu)", + type_name, got_size, min_size); + free(type_name); + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LEN); + } + if ((got_size - min_size) % array_elt_size) { + char *type_name = ofp_message_type_to_string(type); + VLOG_WARN_RL(&bad_ofmsg_rl, + "received %s message of bad length %zu: the " + "excess over %zu (%zu) is not evenly divisible by %zu " + "(remainder is %zu)", + type_name, got_size, min_size, got_size - min_size, + array_elt_size, (got_size - min_size) % array_elt_size); + free(type_name); + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LEN); + } + if (n_array_elts) { + *n_array_elts = (got_size - min_size) / array_elt_size; + } + return 0; + } + + int + check_ofp_packet_out(const struct ofp_header *oh, struct ofpbuf *data, + int *n_actionsp, int max_ports) + { + const struct ofp_packet_out *opo; + unsigned int actions_len, n_actions; + size_t extra; + int error; + + *n_actionsp = 0; + error = check_ofp_message_array(oh, OFPT_PACKET_OUT, + sizeof *opo, 1, &extra); + if (error) { + return error; + } + opo = (const struct ofp_packet_out *) oh; + + actions_len = ntohs(opo->actions_len); + if (actions_len > extra) { + VLOG_WARN_RL(&bad_ofmsg_rl, "packet-out claims %u bytes of actions " + "but message has room for only %zu bytes", + actions_len, extra); + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LEN); + } + if (actions_len % sizeof(union ofp_action)) { + VLOG_WARN_RL(&bad_ofmsg_rl, "packet-out claims %u bytes of actions, " + "which is not a multiple of %zu", + actions_len, sizeof(union ofp_action)); + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LEN); + } + + n_actions = actions_len / sizeof(union ofp_action); + error = validate_actions((const union ofp_action *) opo->actions, + n_actions, max_ports); + if (error) { + return error; + } + + data->data = (void *) &opo->actions[n_actions]; + data->size = extra - actions_len; + *n_actionsp = n_actions; + return 0; + } + + const struct ofp_flow_stats * + flow_stats_first(struct flow_stats_iterator *iter, + const struct ofp_stats_reply *osr) + { + iter->pos = osr->body; + iter->end = osr->body + (ntohs(osr->header.length) + - offsetof(struct ofp_stats_reply, body)); + return flow_stats_next(iter); + } + + const struct ofp_flow_stats * + flow_stats_next(struct flow_stats_iterator *iter) + { + ptrdiff_t bytes_left = iter->end - iter->pos; + const struct ofp_flow_stats *fs; + size_t length; + + if (bytes_left < sizeof *fs) { + if (bytes_left != 0) { + VLOG_WARN_RL(&bad_ofmsg_rl, + "%td leftover bytes in flow stats reply", bytes_left); + } + return NULL; + } + + fs = (const void *) iter->pos; + length = ntohs(fs->length); + if (length < sizeof *fs) { + VLOG_WARN_RL(&bad_ofmsg_rl, "flow stats length %zu is shorter than " + "min %zu", length, sizeof *fs); + return NULL; + } else if (length > bytes_left) { + VLOG_WARN_RL(&bad_ofmsg_rl, "flow stats length %zu but only %td " + "bytes left", length, bytes_left); + return NULL; + } else if ((length - sizeof *fs) % sizeof fs->actions[0]) { + VLOG_WARN_RL(&bad_ofmsg_rl, "flow stats length %zu has %zu bytes " + "left over in final action", length, + (length - sizeof *fs) % sizeof fs->actions[0]); + return NULL; + } + iter->pos += length; + return fs; + } + + /* Alignment of ofp_actions. */ + #define ACTION_ALIGNMENT 8 + + static int + check_action_exact_len(const union ofp_action *a, unsigned int len, + unsigned int required_len) + { + if (len != required_len) { + VLOG_DBG_RL(&bad_ofmsg_rl, + "action %u has invalid length %"PRIu16" (must be %u)\n", + a->type, ntohs(a->header.len), required_len); + return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_LEN); + } + return 0; + } + + /* Checks that 'port' is a valid output port for the OFPAT_OUTPUT action, given + * that the switch will never have more than 'max_ports' ports. Returns 0 if + * 'port' is valid, otherwise an ofp_mkerr() return code. */ + static int + check_output_port(uint16_t port, int max_ports) + { + switch (port) { + case OFPP_IN_PORT: + case OFPP_TABLE: + case OFPP_NORMAL: + case OFPP_FLOOD: + case OFPP_ALL: + case OFPP_CONTROLLER: + case OFPP_LOCAL: + return 0; + + default: + if (port < max_ports) { + return 0; + } + VLOG_WARN_RL(&bad_ofmsg_rl, "unknown output port %x", port); + return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_OUT_PORT); + } + } + + /* Checks that 'action' is a valid OFPAT_ENQUEUE action, given that the switch + * will never have more than 'max_ports' ports. Returns 0 if 'port' is valid, + * otherwise an ofp_mkerr() return code. */ + static int + check_enqueue_action(const union ofp_action *a, unsigned int len, + int max_ports) + { + const struct ofp_action_enqueue *oae; + uint16_t port; + int error; + + error = check_action_exact_len(a, len, 16); + if (error) { + return error; + } + + oae = (const struct ofp_action_enqueue *) a; + port = ntohs(oae->port); + if (port < max_ports || port == OFPP_IN_PORT) { + return 0; + } + VLOG_WARN_RL(&bad_ofmsg_rl, "unknown enqueue port %x", port); + return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_OUT_PORT); + } + + static int + check_nicira_action(const union ofp_action *a, unsigned int len) + { + const struct nx_action_header *nah; + + if (len < 16) { + VLOG_DBG_RL(&bad_ofmsg_rl, + "Nicira vendor action only %u bytes", len); + return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_LEN); + } + nah = (const struct nx_action_header *) a; + + switch (ntohs(nah->subtype)) { + case NXAST_RESUBMIT: + case NXAST_SET_TUNNEL: + return check_action_exact_len(a, len, 16); + default: + return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_VENDOR_TYPE); + } + } + + static int + check_action(const union ofp_action *a, unsigned int len, int max_ports) + { + int error; + + switch (ntohs(a->type)) { + case OFPAT_OUTPUT: + error = check_action_exact_len(a, len, 8); + if (error) { + return error; + } + return check_output_port(ntohs(a->output.port), max_ports); + + case OFPAT_SET_VLAN_VID: + case OFPAT_SET_VLAN_PCP: + case OFPAT_STRIP_VLAN: + case OFPAT_SET_NW_SRC: + case OFPAT_SET_NW_DST: + case OFPAT_SET_NW_TOS: + case OFPAT_SET_TP_SRC: + case OFPAT_SET_TP_DST: + return check_action_exact_len(a, len, 8); + + case OFPAT_SET_DL_SRC: + case OFPAT_SET_DL_DST: + return check_action_exact_len(a, len, 16); + + case OFPAT_VENDOR: + return (a->vendor.vendor == htonl(NX_VENDOR_ID) + ? check_nicira_action(a, len) + : ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_VENDOR)); + + case OFPAT_ENQUEUE: + return check_enqueue_action(a, len, max_ports); + + default: + VLOG_WARN_RL(&bad_ofmsg_rl, "unknown action type %"PRIu16, + ntohs(a->type)); + return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_TYPE); + } + } + + int + validate_actions(const union ofp_action *actions, size_t n_actions, + int max_ports) + { + const union ofp_action *a; + + for (a = actions; a < &actions[n_actions]; ) { + unsigned int len = ntohs(a->header.len); + unsigned int n_slots = len / ACTION_ALIGNMENT; + unsigned int slots_left = &actions[n_actions] - a; + int error; + + if (n_slots > slots_left) { + VLOG_DBG_RL(&bad_ofmsg_rl, + "action requires %u slots but only %u remain", + n_slots, slots_left); + return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_LEN); + } else if (!len) { + VLOG_DBG_RL(&bad_ofmsg_rl, "action has invalid length 0"); + return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_LEN); + } else if (len % ACTION_ALIGNMENT) { + VLOG_DBG_RL(&bad_ofmsg_rl, "action length %u is not a multiple " + "of %d", len, ACTION_ALIGNMENT); + return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_LEN); + } + + error = check_action(a, len, max_ports); + if (error) { + return error; + } + a += n_slots; + } + return 0; + } + + /* Returns true if 'action' outputs to 'port' (which must be in network byte + * order), false otherwise. */ + bool + action_outputs_to_port(const union ofp_action *action, uint16_t port) + { + switch (ntohs(action->type)) { + case OFPAT_OUTPUT: + return action->output.port == port; + case OFPAT_ENQUEUE: + return ((const struct ofp_action_enqueue *) action)->port == port; + default: + return false; + } + } + + /* The set of actions must either come from a trusted source or have been + * previously validated with validate_actions(). */ + const union ofp_action * + actions_first(struct actions_iterator *iter, + const union ofp_action *oa, size_t n_actions) + { + iter->pos = oa; + iter->end = oa + n_actions; + return actions_next(iter); + } + + const union ofp_action * + actions_next(struct actions_iterator *iter) + { + if (iter->pos < iter->end) { + const union ofp_action *a = iter->pos; + unsigned int len = ntohs(a->header.len); + iter->pos += len / ACTION_ALIGNMENT; + return a; + } else { + return NULL; + } + } + + void + normalize_match(struct ofp_match *m) + { + enum { OFPFW_NW = OFPFW_NW_SRC_MASK | OFPFW_NW_DST_MASK | OFPFW_NW_PROTO }; + enum { OFPFW_TP = OFPFW_TP_SRC | OFPFW_TP_DST }; + uint32_t wc; + + wc = ntohl(m->wildcards) & OVSFW_ALL; + if (wc & OFPFW_DL_TYPE) { + m->dl_type = 0; + + /* Can't sensibly match on network or transport headers if the + * data link type is unknown. */ + wc |= OFPFW_NW | OFPFW_TP; + m->nw_src = m->nw_dst = m->nw_proto = 0; + m->tp_src = m->tp_dst = 0; + } else if (m->dl_type == htons(ETH_TYPE_IP)) { + if (wc & OFPFW_NW_PROTO) { + m->nw_proto = 0; + + /* Can't sensibly match on transport headers if the network + * protocol is unknown. */ + wc |= OFPFW_TP; + m->tp_src = m->tp_dst = 0; + } else if (m->nw_proto == IPPROTO_TCP || + m->nw_proto == IPPROTO_UDP || + m->nw_proto == IPPROTO_ICMP) { + if (wc & OFPFW_TP_SRC) { + m->tp_src = 0; + } + if (wc & OFPFW_TP_DST) { + m->tp_dst = 0; + } + } else { + /* Transport layer fields will always be extracted as zeros, so we + * can do an exact-match on those values. */ + wc &= ~OFPFW_TP; + m->tp_src = m->tp_dst = 0; + } + if (wc & OFPFW_NW_SRC_MASK) { + m->nw_src &= flow_nw_bits_to_mask(wc, OFPFW_NW_SRC_SHIFT); + } + if (wc & OFPFW_NW_DST_MASK) { + m->nw_dst &= flow_nw_bits_to_mask(wc, OFPFW_NW_DST_SHIFT); + } + } else if (m->dl_type == htons(ETH_TYPE_ARP)) { + if (wc & OFPFW_NW_PROTO) { + m->nw_proto = 0; + } + if (wc & OFPFW_NW_SRC_MASK) { + m->nw_src &= flow_nw_bits_to_mask(wc, OFPFW_NW_SRC_SHIFT); + } + if (wc & OFPFW_NW_DST_MASK) { + m->nw_dst &= flow_nw_bits_to_mask(wc, OFPFW_NW_DST_SHIFT); + } + m->tp_src = m->tp_dst = 0; + } else { + /* Network and transport layer fields will always be extracted as + * zeros, so we can do an exact-match on those values. */ + wc &= ~(OFPFW_NW | OFPFW_TP); + m->nw_proto = m->nw_src = m->nw_dst = 0; + m->tp_src = m->tp_dst = 0; + } + if (wc & OFPFW_DL_SRC) { + memset(m->dl_src, 0, sizeof m->dl_src); + } + if (wc & OFPFW_DL_DST) { + memset(m->dl_dst, 0, sizeof m->dl_dst); + } + m->wildcards = htonl(wc); + } + diff --cc lib/vlog-modules.def index fe8a5f241,63760eb57..2513f9310 --- a/lib/vlog-modules.def +++ b/lib/vlog-modules.def @@@ -47,60 -47,48 +44,53 @@@ VLOG_MODULE(netdev_vport VLOG_MODULE(netflow) VLOG_MODULE(netlink) VLOG_MODULE(ofctl) + VLOG_MODULE(ofp_util) -VLOG_MODULE(ovs_discover) VLOG_MODULE(ofproto) VLOG_MODULE(openflowd) +VLOG_MODULE(ovs_discover) VLOG_MODULE(ovsdb_client) VLOG_MODULE(ovsdb_error) VLOG_MODULE(ovsdb_file) VLOG_MODULE(ovsdb_idl) -VLOG_MODULE(ovsdb_log) VLOG_MODULE(ovsdb_jsonrpc_server) +VLOG_MODULE(ovsdb_log) VLOG_MODULE(ovsdb_server) VLOG_MODULE(ovsdb_tool) -VLOG_MODULE(pktbuf) VLOG_MODULE(pcap) +VLOG_MODULE(pktbuf) VLOG_MODULE(poll_loop) - VLOG_MODULE(port_watcher) VLOG_MODULE(proc_net_compat) VLOG_MODULE(process) VLOG_MODULE(rconn) VLOG_MODULE(reconnect) VLOG_MODULE(rtnetlink) VLOG_MODULE(sflow) +VLOG_MODULE(socket_util) - VLOG_MODULE(stats) +VLOG_MODULE(status) VLOG_MODULE(stp) +VLOG_MODULE(stream) VLOG_MODULE(stream_fd) VLOG_MODULE(stream_ssl) VLOG_MODULE(stream_tcp) VLOG_MODULE(stream_unix) -VLOG_MODULE(stream) -VLOG_MODULE(status) VLOG_MODULE(svec) - VLOG_MODULE(switch) +VLOG_MODULE(switchui) VLOG_MODULE(terminal) VLOG_MODULE(timeval) VLOG_MODULE(tty) -VLOG_MODULE(socket_util) -VLOG_MODULE(switchui) VLOG_MODULE(unixctl) VLOG_MODULE(util) -VLOG_MODULE(vconn_stream) VLOG_MODULE(vconn) - VLOG_MODULE(vconn_ssl) -VLOG_MODULE(vsctl) +VLOG_MODULE(vconn_stream) - VLOG_MODULE(vconn_tcp) - VLOG_MODULE(vconn_unix) VLOG_MODULE(vlog) +VLOG_MODULE(vsctl) VLOG_MODULE(vswitchd) VLOG_MODULE(vt) - VLOG_MODULE(wcelim) +VLOG_MODULE(wdp) +VLOG_MODULE(wdp_xflow) VLOG_MODULE(xenserver) - VLOG_MODULE(xenserverd) +VLOG_MODULE(xfif) +VLOG_MODULE(xfif_linux) +VLOG_MODULE(xfif_netdev) #undef VLOG_MODULE diff --cc lib/xfif-netdev.c index 0eadbcc5a,000000000..1b6cf9936 mode 100644,000000..100644 --- a/lib/xfif-netdev.c +++ b/lib/xfif-netdev.c @@@ -1,1379 -1,0 +1,1379 @@@ +/* + * Copyright (c) 2009, 2010 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "xfif.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "csum.h" +#include "flow.h" +#include "hmap.h" +#include "list.h" +#include "netdev.h" +#include "xflow-util.h" +#include "ofp-print.h" +#include "ofpbuf.h" +#include "packets.h" +#include "poll-loop.h" +#include "queue.h" +#include "timeval.h" +#include "util.h" +#include "xfif-provider.h" + +#include "vlog.h" +#define THIS_MODULE VLM_xfif_netdev + +/* Configuration parameters. */ +enum { N_QUEUES = 2 }; /* Number of queues for xfif_recv(). */ +enum { MAX_QUEUE_LEN = 100 }; /* Maximum number of packets per queue. */ +enum { N_GROUPS = 16 }; /* Number of port groups. */ +enum { MAX_PORTS = 256 }; /* Maximum number of ports. */ +enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ + +/* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP + * headers to be aligned on a 4-byte boundary. */ +enum { XF_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN }; + +/* Datapath based on the network device interface from netdev.h. */ +struct xf_netdev { + struct list node; + int xf_idx; + int open_cnt; + bool destroyed; + + bool drop_frags; /* Drop all IP fragments, if true. */ + struct ovs_queue queues[N_QUEUES]; /* Messages queued for xfif_recv(). */ + struct hmap flow_table; /* Flow table. */ + struct xflow_port_group groups[N_GROUPS]; + + /* Statistics. */ + long long int n_frags; /* Number of dropped IP fragments. */ + long long int n_hit; /* Number of flow table matches. */ + long long int n_missed; /* Number of flow table misses. */ + long long int n_lost; /* Number of misses not passed to client. */ + + /* Ports. */ + int n_ports; + struct xf_netdev_port *ports[MAX_PORTS]; + struct list port_list; + unsigned int serial; +}; + +/* A port in a netdev-based datapath. */ +struct xf_netdev_port { + int port_no; /* Index into xf_netdev's 'ports'. */ + struct list node; /* Element in xf_netdev's 'port_list'. */ + struct netdev *netdev; + bool internal; /* Internal port (as XFLOW_PORT_INTERNAL)? */ +}; + +/* A flow in xf_netdev's 'flow_table'. */ +struct xf_netdev_flow { + struct hmap_node node; /* Element in xf_netdev's 'flow_table'. */ + struct xflow_key key; + + /* Statistics. */ - struct timeval used; /* Last used time, in milliseconds. */ ++ struct timespec used; /* Last used time. */ + long long int packet_count; /* Number of packets matched. */ + long long int byte_count; /* Number of bytes matched. */ + uint8_t ip_tos; /* IP TOS value. */ + uint16_t tcp_ctl; /* Bitwise-OR of seen tcp_ctl values. */ + + /* Actions. */ + union xflow_action *actions; + unsigned int n_actions; +}; + +/* Interface to netdev-based datapath. */ +struct xfif_netdev { + struct xfif xfif; + struct xf_netdev *xf; + int listen_mask; + unsigned int xf_serial; +}; + +/* All netdev-based datapaths. */ +static struct xf_netdev *xf_netdevs[256]; +struct list xf_netdev_list = LIST_INITIALIZER(&xf_netdev_list); +enum { N_XF_NETDEVS = ARRAY_SIZE(xf_netdevs) }; + +/* Maximum port MTU seen so far. */ +static int max_mtu = ETH_PAYLOAD_MAX; + +static int get_port_by_number(struct xf_netdev *, uint16_t port_no, + struct xf_netdev_port **portp); +static int get_port_by_name(struct xf_netdev *, const char *devname, + struct xf_netdev_port **portp); +static void xf_netdev_free(struct xf_netdev *); +static void xf_netdev_flow_flush(struct xf_netdev *); +static int do_add_port(struct xf_netdev *, const char *devname, uint16_t flags, + uint16_t port_no); +static int do_del_port(struct xf_netdev *, uint16_t port_no); +static int xf_netdev_output_control(struct xf_netdev *, const struct ofpbuf *, + int queue_no, int port_no, uint32_t arg); +static int xf_netdev_execute_actions(struct xf_netdev *, + struct ofpbuf *, struct xflow_key *, + const union xflow_action *, int n); + +static struct xfif_netdev * +xfif_netdev_cast(const struct xfif *xfif) +{ + xfif_assert_class(xfif, &xfif_netdev_class); + return CONTAINER_OF(xfif, struct xfif_netdev, xfif); +} + +static struct xf_netdev * +get_xf_netdev(const struct xfif *xfif) +{ + return xfif_netdev_cast(xfif)->xf; +} + +static int +name_to_xf_idx(const char *name) +{ + if (!strncmp(name, "xf", 2) && isdigit((unsigned char)name[2])) { + int xf_idx = atoi(name + 2); + if (xf_idx >= 0 && xf_idx < N_XF_NETDEVS) { + return xf_idx; + } + } + return -1; +} + +static struct xf_netdev * +find_xf_netdev(const char *name) +{ + int xf_idx; + size_t i; + + xf_idx = name_to_xf_idx(name); + if (xf_idx >= 0) { + return xf_netdevs[xf_idx]; + } + + for (i = 0; i < N_XF_NETDEVS; i++) { + struct xf_netdev *xf = xf_netdevs[i]; + if (xf) { + struct xf_netdev_port *port; + if (!get_port_by_name(xf, name, &port)) { + return xf; + } + } + } + return NULL; +} + +static struct xfif * +create_xfif_netdev(struct xf_netdev *xf) +{ + struct xfif_netdev *xfif; + char *xfname; + + xf->open_cnt++; + + xfname = xasprintf("xf%d", xf->xf_idx); + xfif = xmalloc(sizeof *xfif); + xfif_init(&xfif->xfif, &xfif_netdev_class, xfname, xf->xf_idx, xf->xf_idx); + xfif->xf = xf; + xfif->listen_mask = 0; + xfif->xf_serial = xf->serial; + free(xfname); + + return &xfif->xfif; +} + +static int +create_xf_netdev(const char *name, int xf_idx, struct xfif **xfifp) +{ + struct xf_netdev *xf; + int error; + int i; + + if (xf_netdevs[xf_idx]) { + return EBUSY; + } + + /* Create datapath. */ + xf_netdevs[xf_idx] = xf = xzalloc(sizeof *xf); + list_push_back(&xf_netdev_list, &xf->node); + xf->xf_idx = xf_idx; + xf->open_cnt = 0; + xf->drop_frags = false; + for (i = 0; i < N_QUEUES; i++) { + queue_init(&xf->queues[i]); + } + hmap_init(&xf->flow_table); + for (i = 0; i < N_GROUPS; i++) { + xf->groups[i].ports = NULL; + xf->groups[i].n_ports = 0; + xf->groups[i].group = i; + } + list_init(&xf->port_list); + error = do_add_port(xf, name, XFLOW_PORT_INTERNAL, XFLOWP_LOCAL); + if (error) { + xf_netdev_free(xf); + return ENODEV; + } + + *xfifp = create_xfif_netdev(xf); + return 0; +} + +static int +xfif_netdev_open(const char *name, const char *type OVS_UNUSED, bool create, + struct xfif **xfifp) +{ + if (create) { + if (find_xf_netdev(name)) { + return EEXIST; + } else { + int xf_idx = name_to_xf_idx(name); + if (xf_idx >= 0) { + return create_xf_netdev(name, xf_idx, xfifp); + } else { + /* Scan for unused xf_idx number. */ + for (xf_idx = 0; xf_idx < N_XF_NETDEVS; xf_idx++) { + int error = create_xf_netdev(name, xf_idx, xfifp); + if (error != EBUSY) { + return error; + } + } + + /* All datapath numbers in use. */ + return ENOBUFS; + } + } + } else { + struct xf_netdev *xf = find_xf_netdev(name); + if (xf) { + *xfifp = create_xfif_netdev(xf); + return 0; + } else { + return ENODEV; + } + } +} + +static void +xf_netdev_free(struct xf_netdev *xf) +{ + int i; + + xf_netdev_flow_flush(xf); + while (xf->n_ports > 0) { + struct xf_netdev_port *port = CONTAINER_OF( + xf->port_list.next, struct xf_netdev_port, node); + do_del_port(xf, port->port_no); + } + for (i = 0; i < N_QUEUES; i++) { + queue_destroy(&xf->queues[i]); + } + hmap_destroy(&xf->flow_table); + for (i = 0; i < N_GROUPS; i++) { + free(xf->groups[i].ports); + } + xf_netdevs[xf->xf_idx] = NULL; + list_remove(&xf->node); + free(xf); +} + +static void +xfif_netdev_close(struct xfif *xfif) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + assert(xf->open_cnt > 0); + if (--xf->open_cnt == 0 && xf->destroyed) { + xf_netdev_free(xf); + } + free(xfif); +} + +static int +xfif_netdev_destroy(struct xfif *xfif) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + xf->destroyed = true; + return 0; +} + +static int +xfif_netdev_get_stats(const struct xfif *xfif, struct xflow_stats *stats) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + memset(stats, 0, sizeof *stats); + stats->n_flows = hmap_count(&xf->flow_table); + stats->cur_capacity = hmap_capacity(&xf->flow_table); + stats->max_capacity = MAX_FLOWS; + stats->n_ports = xf->n_ports; + stats->max_ports = MAX_PORTS; + stats->max_groups = N_GROUPS; + stats->n_frags = xf->n_frags; + stats->n_hit = xf->n_hit; + stats->n_missed = xf->n_missed; + stats->n_lost = xf->n_lost; + stats->max_miss_queue = MAX_QUEUE_LEN; + stats->max_action_queue = MAX_QUEUE_LEN; + return 0; +} + +static int +xfif_netdev_get_drop_frags(const struct xfif *xfif, bool *drop_fragsp) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + *drop_fragsp = xf->drop_frags; + return 0; +} + +static int +xfif_netdev_set_drop_frags(struct xfif *xfif, bool drop_frags) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + xf->drop_frags = drop_frags; + return 0; +} + +static int +do_add_port(struct xf_netdev *xf, const char *devname, uint16_t flags, + uint16_t port_no) +{ + bool internal = (flags & XFLOW_PORT_INTERNAL) != 0; + struct xf_netdev_port *port; + struct netdev_options netdev_options; + struct netdev *netdev; + int mtu; + int error; + + /* XXX reject devices already in some xf_netdev. */ + + /* Open and validate network device. */ + memset(&netdev_options, 0, sizeof netdev_options); + netdev_options.name = devname; + netdev_options.ethertype = NETDEV_ETH_TYPE_ANY; + if (internal) { + netdev_options.type = "tap"; + } + + error = netdev_open(&netdev_options, &netdev); + if (error) { + return error; + } + /* XXX reject loopback devices */ + /* XXX reject non-Ethernet devices */ + + error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, false); + if (error) { + netdev_close(netdev); + return error; + } + + port = xmalloc(sizeof *port); + port->port_no = port_no; + port->netdev = netdev; + port->internal = internal; + + netdev_get_mtu(netdev, &mtu); + if (mtu > max_mtu) { + max_mtu = mtu; + } + + list_push_back(&xf->port_list, &port->node); + xf->ports[port_no] = port; + xf->n_ports++; + xf->serial++; + + return 0; +} + +static int +xfif_netdev_port_add(struct xfif *xfif, const char *devname, uint16_t flags, + uint16_t *port_nop) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + int port_no; + + for (port_no = 0; port_no < MAX_PORTS; port_no++) { + if (!xf->ports[port_no]) { + *port_nop = port_no; + return do_add_port(xf, devname, flags, port_no); + } + } + return EFBIG; +} + +static int +xfif_netdev_port_del(struct xfif *xfif, uint16_t port_no) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + return port_no == XFLOWP_LOCAL ? EINVAL : do_del_port(xf, port_no); +} + +static bool +is_valid_port_number(uint16_t port_no) +{ + return port_no < MAX_PORTS; +} + +static int +get_port_by_number(struct xf_netdev *xf, + uint16_t port_no, struct xf_netdev_port **portp) +{ + if (!is_valid_port_number(port_no)) { + *portp = NULL; + return EINVAL; + } else { + *portp = xf->ports[port_no]; + return *portp ? 0 : ENOENT; + } +} + +static int +get_port_by_name(struct xf_netdev *xf, + const char *devname, struct xf_netdev_port **portp) +{ + struct xf_netdev_port *port; + + LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) { + if (!strcmp(netdev_get_name(port->netdev), devname)) { + *portp = port; + return 0; + } + } + return ENOENT; +} + +static int +do_del_port(struct xf_netdev *xf, uint16_t port_no) +{ + struct xf_netdev_port *port; + char *name; + int error; + + error = get_port_by_number(xf, port_no, &port); + if (error) { + return error; + } + + list_remove(&port->node); + xf->ports[port->port_no] = NULL; + xf->n_ports--; + xf->serial++; + + name = xstrdup(netdev_get_name(port->netdev)); + netdev_close(port->netdev); + + free(name); + free(port); + + return 0; +} + +static void +answer_port_query(const struct xf_netdev_port *port, struct xflow_port *xflow_port) +{ + memset(xflow_port, 0, sizeof *xflow_port); + ovs_strlcpy(xflow_port->devname, netdev_get_name(port->netdev), + sizeof xflow_port->devname); + xflow_port->port = port->port_no; + xflow_port->flags = port->internal ? XFLOW_PORT_INTERNAL : 0; +} + +static int +xfif_netdev_port_query_by_number(const struct xfif *xfif, uint16_t port_no, + struct xflow_port *xflow_port) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_port *port; + int error; + + error = get_port_by_number(xf, port_no, &port); + if (!error) { + answer_port_query(port, xflow_port); + } + return error; +} + +static int +xfif_netdev_port_query_by_name(const struct xfif *xfif, const char *devname, + struct xflow_port *xflow_port) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_port *port; + int error; + + error = get_port_by_name(xf, devname, &port); + if (!error) { + answer_port_query(port, xflow_port); + } + return error; +} + +static void +xf_netdev_free_flow(struct xf_netdev *xf, struct xf_netdev_flow *flow) +{ + hmap_remove(&xf->flow_table, &flow->node); + free(flow->actions); + free(flow); +} + +static void +xf_netdev_flow_flush(struct xf_netdev *xf) +{ + struct xf_netdev_flow *flow, *next; + + HMAP_FOR_EACH_SAFE (flow, next, struct xf_netdev_flow, node, + &xf->flow_table) { + xf_netdev_free_flow(xf, flow); + } +} + +static int +xfif_netdev_flow_flush(struct xfif *xfif) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + xf_netdev_flow_flush(xf); + return 0; +} + +static int +xfif_netdev_port_list(const struct xfif *xfif, struct xflow_port *ports, int n) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_port *port; + int i; + + i = 0; + LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) { + struct xflow_port *xflow_port = &ports[i]; + if (i >= n) { + break; + } + answer_port_query(port, xflow_port); + i++; + } + return xf->n_ports; +} + +static int +xfif_netdev_port_poll(const struct xfif *xfif_, char **devnamep OVS_UNUSED) +{ + struct xfif_netdev *xfif = xfif_netdev_cast(xfif_); + if (xfif->xf_serial != xfif->xf->serial) { + xfif->xf_serial = xfif->xf->serial; + return ENOBUFS; + } else { + return EAGAIN; + } +} + +static void +xfif_netdev_port_poll_wait(const struct xfif *xfif_) +{ + struct xfif_netdev *xfif = xfif_netdev_cast(xfif_); + if (xfif->xf_serial != xfif->xf->serial) { + poll_immediate_wake(); + } +} + +static int +get_port_group(const struct xfif *xfif, int group_no, + struct xflow_port_group **groupp) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + + if (group_no >= 0 && group_no < N_GROUPS) { + *groupp = &xf->groups[group_no]; + return 0; + } else { + *groupp = NULL; + return EINVAL; + } +} + +static int +xfif_netdev_port_group_get(const struct xfif *xfif, int group_no, + uint16_t ports[], int n) +{ + struct xflow_port_group *group; + int error; + + if (n < 0) { + return -EINVAL; + } + + error = get_port_group(xfif, group_no, &group); + if (!error) { + memcpy(ports, group->ports, MIN(n, group->n_ports) * sizeof *ports); + return group->n_ports; + } else { + return -error; + } +} + +static int +xfif_netdev_port_group_set(struct xfif *xfif, int group_no, + const uint16_t ports[], int n) +{ + struct xflow_port_group *group; + int error; + + if (n < 0 || n > MAX_PORTS) { + return EINVAL; + } + + error = get_port_group(xfif, group_no, &group); + if (!error) { + free(group->ports); + group->ports = xmemdup(ports, n * sizeof *group->ports); + group->n_ports = n; + group->group = group_no; + } + return error; +} + +static struct xf_netdev_flow * +xf_netdev_lookup_flow(const struct xf_netdev *xf, + const struct xflow_key *key) +{ + struct xf_netdev_flow *flow; + + HMAP_FOR_EACH_WITH_HASH (flow, struct xf_netdev_flow, node, + xflow_key_hash(key, 0), &xf->flow_table) { + if (xflow_key_equal(&flow->key, key)) { + return flow; + } + } + return NULL; +} + +static void +answer_flow_query(struct xf_netdev_flow *flow, uint32_t query_flags, + struct xflow_flow *xflow_flow) +{ + if (flow) { + xflow_flow->key = flow->key; + xflow_flow->stats.n_packets = flow->packet_count; + xflow_flow->stats.n_bytes = flow->byte_count; + xflow_flow->stats.used_sec = flow->used.tv_sec; - xflow_flow->stats.used_nsec = flow->used.tv_usec * 1000; ++ xflow_flow->stats.used_nsec = flow->used.tv_nsec; + xflow_flow->stats.tcp_flags = TCP_FLAGS(flow->tcp_ctl); + xflow_flow->stats.ip_tos = flow->ip_tos; + xflow_flow->stats.error = 0; + if (xflow_flow->n_actions > 0) { + unsigned int n = MIN(xflow_flow->n_actions, flow->n_actions); + memcpy(xflow_flow->actions, flow->actions, + n * sizeof *xflow_flow->actions); + xflow_flow->n_actions = flow->n_actions; + } + + if (query_flags & XFLOWFF_ZERO_TCP_FLAGS) { + flow->tcp_ctl = 0; + } + + } else { + xflow_flow->stats.error = ENOENT; + } +} + +static int +xfif_netdev_flow_get(const struct xfif *xfif, struct xflow_flow flows[], int n) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + int i; + + for (i = 0; i < n; i++) { + struct xflow_flow *xflow_flow = &flows[i]; + answer_flow_query(xf_netdev_lookup_flow(xf, &xflow_flow->key), + xflow_flow->flags, xflow_flow); + } + return 0; +} + +static int +xfif_netdev_validate_actions(const union xflow_action *actions, int n_actions, + bool *mutates) +{ + unsigned int i; + + *mutates = false; + for (i = 0; i < n_actions; i++) { + const union xflow_action *a = &actions[i]; + switch (a->type) { + case XFLOWAT_OUTPUT: + if (a->output.port >= MAX_PORTS) { + return EINVAL; + } + break; + + case XFLOWAT_OUTPUT_GROUP: + *mutates = true; + if (a->output_group.group >= N_GROUPS) { + return EINVAL; + } + break; + + case XFLOWAT_CONTROLLER: + break; + + case XFLOWAT_SET_DL_TCI: + *mutates = true; + if (a->dl_tci.mask != htons(VLAN_VID_MASK) + && a->dl_tci.mask != htons(VLAN_PCP_MASK) + && a->dl_tci.mask != htons(VLAN_VID_MASK | VLAN_PCP_MASK)) { + return EINVAL; + } + if (a->dl_tci.tci & ~a->dl_tci.mask){ + return EINVAL; + } + break; + + case XFLOWAT_SET_NW_TOS: + *mutates = true; + if (a->nw_tos.nw_tos & IP_ECN_MASK) { + return EINVAL; + } + break; + + case XFLOWAT_STRIP_VLAN: + case XFLOWAT_SET_DL_SRC: + case XFLOWAT_SET_DL_DST: + case XFLOWAT_SET_NW_SRC: + case XFLOWAT_SET_NW_DST: + case XFLOWAT_SET_TP_SRC: + case XFLOWAT_SET_TP_DST: + *mutates = true; + break; + + default: + return EOPNOTSUPP; + } + } + return 0; +} + +static int +set_flow_actions(struct xf_netdev_flow *flow, struct xflow_flow *xflow_flow) +{ + size_t n_bytes; + bool mutates; + int error; + + if (xflow_flow->n_actions >= 4096 / sizeof *xflow_flow->actions) { + return EINVAL; + } + error = xfif_netdev_validate_actions(xflow_flow->actions, + xflow_flow->n_actions, &mutates); + if (error) { + return error; + } + + n_bytes = xflow_flow->n_actions * sizeof *flow->actions; + flow->actions = xrealloc(flow->actions, n_bytes); + flow->n_actions = xflow_flow->n_actions; + memcpy(flow->actions, xflow_flow->actions, n_bytes); + return 0; +} + +static int +add_flow(struct xfif *xfif, struct xflow_flow *xflow_flow) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_flow *flow; + int error; + + flow = xzalloc(sizeof *flow); + flow->key = xflow_flow->key; + + error = set_flow_actions(flow, xflow_flow); + if (error) { + free(flow); + return error; + } + + hmap_insert(&xf->flow_table, &flow->node, + xflow_key_hash(&flow->key, 0)); + return 0; +} + +static void +clear_stats(struct xf_netdev_flow *flow) +{ + flow->used.tv_sec = 0; - flow->used.tv_usec = 0; ++ flow->used.tv_nsec = 0; + flow->packet_count = 0; + flow->byte_count = 0; + flow->ip_tos = 0; + flow->tcp_ctl = 0; +} + +static int +xfif_netdev_flow_put(struct xfif *xfif, struct xflow_flow_put *put) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_flow *flow; + + flow = xf_netdev_lookup_flow(xf, &put->flow.key); + if (!flow) { + if (put->flags & XFLOWPF_CREATE) { + if (hmap_count(&xf->flow_table) < MAX_FLOWS) { + return add_flow(xfif, &put->flow); + } else { + return EFBIG; + } + } else { + return ENOENT; + } + } else { + if (put->flags & XFLOWPF_MODIFY) { + int error = set_flow_actions(flow, &put->flow); + if (!error && put->flags & XFLOWPF_ZERO_STATS) { + clear_stats(flow); + } + return error; + } else { + return EEXIST; + } + } +} + + +static int +xfif_netdev_flow_del(struct xfif *xfif, struct xflow_flow *xflow_flow) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_flow *flow; + + flow = xf_netdev_lookup_flow(xf, &xflow_flow->key); + if (flow) { + answer_flow_query(flow, 0, xflow_flow); + xf_netdev_free_flow(xf, flow); + return 0; + } else { + return ENOENT; + } +} + +static int +xfif_netdev_flow_list(const struct xfif *xfif, struct xflow_flow flows[], int n) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct xf_netdev_flow *flow; + int i; + + i = 0; + HMAP_FOR_EACH (flow, struct xf_netdev_flow, node, &xf->flow_table) { + if (i >= n) { + break; + } + answer_flow_query(flow, 0, &flows[i++]); + } + return hmap_count(&xf->flow_table); +} + +static int +xfif_netdev_execute(struct xfif *xfif, uint16_t in_port, + const union xflow_action actions[], int n_actions, + const struct ofpbuf *packet) +{ + struct xf_netdev *xf = get_xf_netdev(xfif); + struct ofpbuf copy; + bool mutates; + struct xflow_key key; + flow_t flow; + int error; + + if (packet->size < ETH_HEADER_LEN || packet->size > UINT16_MAX) { + return EINVAL; + } + + error = xfif_netdev_validate_actions(actions, n_actions, &mutates); + if (error) { + return error; + } + + if (mutates) { + /* We need a deep copy of 'packet' since we're going to modify its + * data. */ + ofpbuf_init(©, XF_NETDEV_HEADROOM + packet->size); + copy.data = (char*)copy.base + XF_NETDEV_HEADROOM; + ofpbuf_put(©, packet->data, packet->size); + } else { + /* We still need a shallow copy of 'packet', even though we won't + * modify its data, because flow_extract() modifies packet->l2, etc. + * We could probably get away with modifying those but it's more polite + * if we don't. */ + copy = *packet; + } + flow_extract(©, 0, in_port, &flow); + xflow_key_from_flow(&key, &flow); + error = xf_netdev_execute_actions(xf, ©, &key, actions, n_actions); + if (mutates) { + ofpbuf_uninit(©); + } + return error; +} + +static int +xfif_netdev_recv_get_mask(const struct xfif *xfif, int *listen_mask) +{ + struct xfif_netdev *xfif_netdev = xfif_netdev_cast(xfif); + *listen_mask = xfif_netdev->listen_mask; + return 0; +} + +static int +xfif_netdev_recv_set_mask(struct xfif *xfif, int listen_mask) +{ + struct xfif_netdev *xfif_netdev = xfif_netdev_cast(xfif); + if (!(listen_mask & ~XFLOWL_ALL)) { + xfif_netdev->listen_mask = listen_mask; + return 0; + } else { + return EINVAL; + } +} + +static struct ovs_queue * +find_nonempty_queue(struct xfif *xfif) +{ + struct xfif_netdev *xfif_netdev = xfif_netdev_cast(xfif); + struct xf_netdev *xf = get_xf_netdev(xfif); + int mask = xfif_netdev->listen_mask; + int i; + + for (i = 0; i < N_QUEUES; i++) { + struct ovs_queue *q = &xf->queues[i]; + if (q->n && mask & (1u << i)) { + return q; + } + } + return NULL; +} + +static int +xfif_netdev_recv(struct xfif *xfif, struct ofpbuf **bufp) +{ + struct ovs_queue *q = find_nonempty_queue(xfif); + if (q) { + *bufp = queue_pop_head(q); + return 0; + } else { + return EAGAIN; + } +} + +static void +xfif_netdev_recv_wait(struct xfif *xfif) +{ + struct ovs_queue *q = find_nonempty_queue(xfif); + if (q) { + poll_immediate_wake(); + } else { + /* No messages ready to be received, and xf_wait() will ensure that we + * wake up to queue new messages, so there is nothing to do. */ + } +} + +static void +xf_netdev_flow_used(struct xf_netdev_flow *flow, + const struct xflow_key *key, + const struct ofpbuf *packet) +{ - time_timeval(&flow->used); ++ time_timespec(&flow->used); + flow->packet_count++; + flow->byte_count += packet->size; + if (key->dl_type == htons(ETH_TYPE_IP)) { + struct ip_header *nh = packet->l3; + flow->ip_tos = nh->ip_tos; + + if (key->nw_proto == IPPROTO_TCP) { + struct tcp_header *th = packet->l4; + flow->tcp_ctl |= th->tcp_ctl; + } + } +} + +static void +xf_netdev_port_input(struct xf_netdev *xf, struct xf_netdev_port *port, + struct ofpbuf *packet) +{ + struct xf_netdev_flow *flow; + struct xflow_key key; + flow_t f; + + if (flow_extract(packet, 0, port->port_no, &f) && xf->drop_frags) { + xf->n_frags++; + return; + } + xflow_key_from_flow(&key, &f); + + flow = xf_netdev_lookup_flow(xf, &key); + if (flow) { + xf_netdev_flow_used(flow, &key, packet); + xf_netdev_execute_actions(xf, packet, &key, + flow->actions, flow->n_actions); + xf->n_hit++; + } else { + xf->n_missed++; + xf_netdev_output_control(xf, packet, _XFLOWL_MISS_NR, port->port_no, 0); + } +} + +static void +xf_netdev_run(void) +{ + struct ofpbuf packet; + struct xf_netdev *xf; + + ofpbuf_init(&packet, XF_NETDEV_HEADROOM + max_mtu); + LIST_FOR_EACH (xf, struct xf_netdev, node, &xf_netdev_list) { + struct xf_netdev_port *port; + + LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) { + int error; + + /* Reset packet contents. */ + packet.data = (char*)packet.base + XF_NETDEV_HEADROOM; + packet.size = 0; + + error = netdev_recv(port->netdev, &packet); + if (!error) { + xf_netdev_port_input(xf, port, &packet); + } else if (error != EAGAIN) { + struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_ERR_RL(&rl, "error receiving data from %s: %s", + netdev_get_name(port->netdev), strerror(error)); + } + } + } + ofpbuf_uninit(&packet); +} + +static void +xf_netdev_wait(void) +{ + struct xf_netdev *xf; + + LIST_FOR_EACH (xf, struct xf_netdev, node, &xf_netdev_list) { + struct xf_netdev_port *port; + LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) { + netdev_recv_wait(port->netdev); + } + } +} + + +/* Modify or add a 802.1Q header in 'packet' according to 'a'. */ +static void +xf_netdev_set_dl_tci(struct ofpbuf *packet, struct xflow_key *key, + const struct xflow_action_dl_tci *a) +{ + struct vlan_eth_header *veh; + + if (key->dl_tci) { + veh = packet->l2; + veh->veth_tci = (veh->veth_tci & ~a->mask) | a->tci; + } else { + /* Insert new 802.1Q header. */ + struct eth_header *eh = packet->l2; + struct vlan_eth_header tmp; + memcpy(tmp.veth_dst, eh->eth_dst, ETH_ADDR_LEN); + memcpy(tmp.veth_src, eh->eth_src, ETH_ADDR_LEN); + tmp.veth_type = htons(ETH_TYPE_VLAN); + tmp.veth_tci = htons(a->tci); + tmp.veth_next_type = eh->eth_type; + + veh = ofpbuf_push_uninit(packet, VLAN_HEADER_LEN); + memcpy(veh, &tmp, sizeof tmp); + packet->l2 = (char*)packet->l2 - VLAN_HEADER_LEN; + } + + key->dl_tci = veh->veth_tci | htons(XFLOW_TCI_PRESENT); +} + +static void +xf_netdev_strip_vlan(struct ofpbuf *packet, struct xflow_key *key) +{ + struct vlan_eth_header *veh = packet->l2; + if (veh->veth_type == htons(ETH_TYPE_VLAN)) { + struct eth_header tmp; + + memcpy(tmp.eth_dst, veh->veth_dst, ETH_ADDR_LEN); + memcpy(tmp.eth_src, veh->veth_src, ETH_ADDR_LEN); + tmp.eth_type = veh->veth_next_type; + + packet->size -= VLAN_HEADER_LEN; + packet->data = (char*)packet->data + VLAN_HEADER_LEN; + packet->l2 = (char*)packet->l2 + VLAN_HEADER_LEN; + memcpy(packet->data, &tmp, sizeof tmp); + + key->dl_tci = htons(0); + } +} + +static void +xf_netdev_set_dl_src(struct ofpbuf *packet, struct xflow_key *key, + const uint8_t dl_addr[ETH_ADDR_LEN]) +{ + struct eth_header *eh = packet->l2; + memcpy(eh->eth_src, dl_addr, sizeof eh->eth_src); + memcpy(key->dl_src, dl_addr, sizeof key->dl_src); +} + +static void +xf_netdev_set_dl_dst(struct ofpbuf *packet, struct xflow_key *key, + const uint8_t dl_addr[ETH_ADDR_LEN]) +{ + struct eth_header *eh = packet->l2; + memcpy(eh->eth_dst, dl_addr, sizeof eh->eth_dst); + memcpy(key->dl_dst, dl_addr, sizeof key->dl_dst); +} + +static void +xf_netdev_set_nw_addr(struct ofpbuf *packet, struct xflow_key *key, + const struct xflow_action_nw_addr *a) +{ + if (key->dl_type == htons(ETH_TYPE_IP)) { + struct ip_header *nh = packet->l3; + uint32_t *field; + + field = a->type == XFLOWAT_SET_NW_SRC ? &nh->ip_src : &nh->ip_dst; + if (key->nw_proto == IP_TYPE_TCP) { + struct tcp_header *th = packet->l4; + th->tcp_csum = recalc_csum32(th->tcp_csum, *field, a->nw_addr); + } else if (key->nw_proto == IP_TYPE_UDP) { + struct udp_header *uh = packet->l4; + if (uh->udp_csum) { + uh->udp_csum = recalc_csum32(uh->udp_csum, *field, a->nw_addr); + if (!uh->udp_csum) { + uh->udp_csum = 0xffff; + } + } + } + nh->ip_csum = recalc_csum32(nh->ip_csum, *field, a->nw_addr); + *field = a->nw_addr; + + if (a->type == XFLOWAT_SET_NW_SRC) { + key->nw_src = a->type; + } else { + key->nw_dst = a->type; + } + } +} + +static void +xf_netdev_set_nw_tos(struct ofpbuf *packet, struct xflow_key *key, + const struct xflow_action_nw_tos *a) +{ + if (key->dl_type == htons(ETH_TYPE_IP)) { + struct ip_header *nh = packet->l3; + uint8_t *field = &nh->ip_tos; + + /* Set the DSCP bits and preserve the ECN bits. */ + uint8_t new = a->nw_tos | (nh->ip_tos & IP_ECN_MASK); + + nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t)*field), + htons((uint16_t)a->nw_tos)); + *field = new; + key->nw_tos = a->nw_tos; + } +} + +static void +xf_netdev_set_tp_port(struct ofpbuf *packet, struct xflow_key *key, + const struct xflow_action_tp_port *a) +{ + if (key->dl_type == htons(ETH_TYPE_IP)) { + uint16_t *field; + if (key->nw_proto == IPPROTO_TCP) { + struct tcp_header *th = packet->l4; + field = a->type == XFLOWAT_SET_TP_SRC ? &th->tcp_src : &th->tcp_dst; + th->tcp_csum = recalc_csum16(th->tcp_csum, *field, a->tp_port); + *field = a->tp_port; + } else if (key->nw_proto == IPPROTO_UDP) { + struct udp_header *uh = packet->l4; + field = a->type == XFLOWAT_SET_TP_SRC ? &uh->udp_src : &uh->udp_dst; + uh->udp_csum = recalc_csum16(uh->udp_csum, *field, a->tp_port); + *field = a->tp_port; + } else { + return; + } + + if (a->type == XFLOWAT_SET_TP_SRC) { + key->tp_src = a->tp_port; + } else { + key->tp_dst = a->tp_port; + } + } +} + +static void +xf_netdev_output_port(struct xf_netdev *xf, struct ofpbuf *packet, + uint16_t out_port) +{ + struct xf_netdev_port *p = xf->ports[out_port]; + if (p) { + netdev_send(p->netdev, packet); + } +} + +static void +xf_netdev_output_group(struct xf_netdev *xf, uint16_t group, uint16_t in_port, + struct ofpbuf *packet) +{ + struct xflow_port_group *g = &xf->groups[group]; + int i; + + for (i = 0; i < g->n_ports; i++) { + uint16_t out_port = g->ports[i]; + if (out_port != in_port) { + xf_netdev_output_port(xf, packet, out_port); + } + } +} + +static int +xf_netdev_output_control(struct xf_netdev *xf, const struct ofpbuf *packet, + int queue_no, int port_no, uint32_t arg) +{ + struct ovs_queue *q = &xf->queues[queue_no]; + struct xflow_msg *header; + struct ofpbuf *msg; + size_t msg_size; + + if (q->n >= MAX_QUEUE_LEN) { + xf->n_lost++; + return ENOBUFS; + } + + msg_size = sizeof *header + packet->size; + msg = ofpbuf_new(msg_size + XFIF_RECV_MSG_PADDING); + header = ofpbuf_put_uninit(msg, sizeof *header); + ofpbuf_reserve(msg, XFIF_RECV_MSG_PADDING); + header->type = queue_no; + header->length = msg_size; + header->port = port_no; + header->arg = arg; + ofpbuf_put(msg, packet->data, packet->size); + queue_push_tail(q, msg); + + return 0; +} + +static int +xf_netdev_execute_actions(struct xf_netdev *xf, + struct ofpbuf *packet, struct xflow_key *key, + const union xflow_action *actions, int n_actions) +{ + int i; + for (i = 0; i < n_actions; i++) { + const union xflow_action *a = &actions[i]; + + switch (a->type) { + case XFLOWAT_OUTPUT: + xf_netdev_output_port(xf, packet, a->output.port); + break; + + case XFLOWAT_OUTPUT_GROUP: + xf_netdev_output_group(xf, a->output_group.group, key->in_port, + packet); + break; + + case XFLOWAT_CONTROLLER: + xf_netdev_output_control(xf, packet, _XFLOWL_ACTION_NR, + key->in_port, a->controller.arg); + break; + + case XFLOWAT_SET_DL_TCI: + xf_netdev_set_dl_tci(packet, key, &a->dl_tci); + break; + + case XFLOWAT_STRIP_VLAN: + xf_netdev_strip_vlan(packet, key); + break; + + case XFLOWAT_SET_DL_SRC: + xf_netdev_set_dl_src(packet, key, a->dl_addr.dl_addr); + break; + + case XFLOWAT_SET_DL_DST: + xf_netdev_set_dl_dst(packet, key, a->dl_addr.dl_addr); + break; + + case XFLOWAT_SET_NW_SRC: + case XFLOWAT_SET_NW_DST: + xf_netdev_set_nw_addr(packet, key, &a->nw_addr); + break; + + case XFLOWAT_SET_NW_TOS: + xf_netdev_set_nw_tos(packet, key, &a->nw_tos); + break; + + case XFLOWAT_SET_TP_SRC: + case XFLOWAT_SET_TP_DST: + xf_netdev_set_tp_port(packet, key, &a->tp_port); + break; + } + } + return 0; +} + +const struct xfif_class xfif_netdev_class = { + "netdev", + xf_netdev_run, + xf_netdev_wait, + NULL, /* enumerate */ + xfif_netdev_open, + xfif_netdev_close, + NULL, /* get_all_names */ + xfif_netdev_destroy, + xfif_netdev_get_stats, + xfif_netdev_get_drop_frags, + xfif_netdev_set_drop_frags, + xfif_netdev_port_add, + xfif_netdev_port_del, + xfif_netdev_port_query_by_number, + xfif_netdev_port_query_by_name, + xfif_netdev_port_list, + xfif_netdev_port_poll, + xfif_netdev_port_poll_wait, + xfif_netdev_port_group_get, + xfif_netdev_port_group_set, + xfif_netdev_flow_get, + xfif_netdev_flow_put, + xfif_netdev_flow_del, + xfif_netdev_flow_flush, + xfif_netdev_flow_list, + xfif_netdev_execute, + xfif_netdev_recv_get_mask, + xfif_netdev_recv_set_mask, + NULL, /* get_sflow_probability */ + NULL, /* set_sflow_probability */ + xfif_netdev_recv, + xfif_netdev_recv_wait, +}; diff --cc lib/xfif-provider.h index 3b427972f,5cbefebc6..2c7720b5e --- a/lib/xfif-provider.h +++ b/lib/xfif-provider.h @@@ -14,11 -14,13 +14,13 @@@ * limitations under the License. */ -#ifndef DPIF_PROVIDER_H -#define DPIF_PROVIDER_H 1 +#ifndef XFIF_PROVIDER_H +#define XFIF_PROVIDER_H 1 -/* Provider interface to dpifs, which provide an interface to an Open vSwitch +/* Provider interface to xfifs, which provide an interface to an Open vSwitch - * datapath. */ + * datapath. A datapath is a collection of physical or virtual ports that are + * exposed over OpenFlow as a single switch. Datapaths and the collections of + * ports that they contain may be fixed or dynamic. */ #include #include "openflow/openflow.h" diff --cc ofproto/discovery.c index fb69f347f,84f092e69..1a9bb58de --- a/ofproto/discovery.c +++ b/ofproto/discovery.c @@@ -106,6 -106,8 +107,8 @@@ discovery_create(const char *re, bool u d = xzalloc(sizeof *d); - d->dpif_name = xstrdup(dpif_base_name(dpif)); ++ d->dpif_name = xstrdup(wdp_base_name(wdp)); + /* Controller regular expression. */ error = discovery_set_accept_controller_re(d, re); if (error) { @@@ -114,16 -116,18 +117,18 @@@ d->update_resolv_conf = update_resolv_conf; /* Initialize DHCP client. */ - error = dpif_port_get_name(dpif, ODPP_LOCAL, - local_name, sizeof local_name); + error = wdp_port_get_name(wdp, OFPP_LOCAL, &local_name); if (error) { - VLOG_ERR("failed to query datapath local port: %s", strerror(error)); + VLOG_ERR("%s: failed to query datapath local port: %s", + d->dpif_name, strerror(error)); goto error_regfree; } error = dhclient_create(local_name, modify_dhcp_request, validate_dhcp_offer, d, &d->dhcp); + free(local_name); if (error) { - VLOG_ERR("failed to initialize DHCP client: %s", strerror(error)); + VLOG_ERR("%s: failed to initialize DHCP client: %s", + d->dpif_name, strerror(error)); goto error_regfree; } dhclient_set_max_timeout(d->dhcp, 3); diff --cc ofproto/fail-open.c index 352ce11c4,a86ee391b..badf5ec06 --- a/ofproto/fail-open.c +++ b/ofproto/fail-open.c @@@ -20,6 -20,8 +20,7 @@@ #include #include "flow.h" #include "mac-learning.h" -#include "odp-util.h" + #include "ofp-util.h" #include "ofpbuf.h" #include "ofproto.h" #include "pktbuf.h" diff --cc ofproto/ofproto-sflow.c index e643f84a0,cc6a6935a..a7901ce8d --- a/ofproto/ofproto-sflow.c +++ b/ofproto/ofproto-sflow.c @@@ -356,7 -354,7 +356,7 @@@ ofproto_sflow_del_port(struct ofproto_s } netdev_close(osp->netdev); free(osp); - port_array_set(&os->ports, xflow_port, NULL); - port_array_delete(&os->ports, odp_port); ++ port_array_delete(&os->ports, xflow_port); } } diff --cc ofproto/ofproto.c index e64304278,4c4df9493..2b0195cc4 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@@ -33,7 -34,9 +33,8 @@@ #include "mac-learning.h" #include "netdev.h" #include "netflow.h" -#include "odp-util.h" #include "ofp-print.h" + #include "ofp-util.h" #include "ofproto-sflow.h" #include "ofpbuf.h" #include "openflow/nicira-ext.h" @@@ -1191,9 -1713,85 +1204,21 @@@ ofconn_receives_async_msgs(const struc return ofconn->miss_send_len > 0; } } + + /* Returns a human-readable name for an OpenFlow connection between 'ofproto' + * and 'target', suitable for use in log messages for identifying the + * connection. + * + * The name is dynamically allocated. The caller should free it (with free()) + * when it is no longer needed. */ + static char * + ofconn_make_name(const struct ofproto *ofproto, const char *target) + { - return xasprintf("%s<->%s", dpif_base_name(ofproto->dpif), target); ++ return xasprintf("%s<->%s", wdp_base_name(ofproto->wdp), target); + } -/* Caller is responsible for initializing the 'cr' member of the returned - * rule. */ -static struct rule * -rule_create(struct ofproto *ofproto, struct rule *super, - const union ofp_action *actions, size_t n_actions, - uint16_t idle_timeout, uint16_t hard_timeout, - uint64_t flow_cookie, bool send_flow_removed) -{ - struct rule *rule = xzalloc(sizeof *rule); - rule->idle_timeout = idle_timeout; - rule->hard_timeout = hard_timeout; - rule->flow_cookie = flow_cookie; - rule->used = rule->created = time_msec(); - rule->send_flow_removed = send_flow_removed; - rule->super = super; - if (super) { - list_push_back(&super->list, &rule->list); - } else { - list_init(&rule->list); - } - rule->n_actions = n_actions; - rule->actions = xmemdup(actions, n_actions * sizeof *actions); - netflow_flow_clear(&rule->nf_flow); - netflow_flow_update_time(ofproto->netflow, &rule->nf_flow, rule->created); - - return rule; -} - -static struct rule * -rule_from_cls_rule(const struct cls_rule *cls_rule) -{ - return cls_rule ? CONTAINER_OF(cls_rule, struct rule, cr) : NULL; -} - -static void -rule_free(struct rule *rule) -{ - free(rule->actions); - free(rule->odp_actions); - free(rule); -} - -/* Destroys 'rule'. If 'rule' is a subrule, also removes it from its - * super-rule's list of subrules. If 'rule' is a super-rule, also iterates - * through all of its subrules and revalidates them, destroying any that no - * longer has a super-rule (which is probably all of them). - * - * Before calling this function, the caller must make have removed 'rule' from - * the classifier. If 'rule' is an exact-match rule, the caller is also - * responsible for ensuring that it has been uninstalled from the datapath. */ -static void -rule_destroy(struct ofproto *ofproto, struct rule *rule) -{ - if (!rule->super) { - struct rule *subrule, *next; - LIST_FOR_EACH_SAFE (subrule, next, struct rule, list, &rule->list) { - revalidate_rule(ofproto, subrule); - } - } else { - list_remove(&rule->list); - } - rule_free(rule); -} - static bool -rule_has_out_port(const struct rule *rule, uint16_t out_port) +rule_has_out_port(const struct wdp_rule *rule, uint16_t out_port) { const union ofp_action *oa; struct actions_iterator i; @@@ -1820,6 -3191,95 +1845,105 @@@ handle_aggregate_stats_request(struct o return 0; } + struct queue_stats_cbdata { + struct ofconn *ofconn; + struct ofpbuf *msg; + uint16_t port_no; + }; + + static void + put_queue_stats(struct queue_stats_cbdata *cbdata, uint32_t queue_id, + const struct netdev_queue_stats *stats) + { + struct ofp_queue_stats *reply; + + reply = append_stats_reply(sizeof *reply, cbdata->ofconn, &cbdata->msg); + reply->port_no = htons(cbdata->port_no); + memset(reply->pad, 0, sizeof reply->pad); + reply->queue_id = htonl(queue_id); + reply->tx_bytes = htonll(stats->tx_bytes); + reply->tx_packets = htonll(stats->tx_packets); + reply->tx_errors = htonll(stats->tx_errors); + } + + static void + handle_queue_stats_dump_cb(uint32_t queue_id, + struct netdev_queue_stats *stats, + void *cbdata_) + { + struct queue_stats_cbdata *cbdata = cbdata_; + + put_queue_stats(cbdata, queue_id, stats); + } + + static void -handle_queue_stats_for_port(struct ofport *port, uint16_t port_no, - uint32_t queue_id, ++handle_queue_stats_for_port(struct wdp_port *port, uint32_t queue_id, + struct queue_stats_cbdata *cbdata) + { - cbdata->port_no = port_no; ++ cbdata->port_no = port->opp.port_no; + if (queue_id == OFPQ_ALL) { + netdev_dump_queue_stats(port->netdev, + handle_queue_stats_dump_cb, cbdata); + } else { + struct netdev_queue_stats stats; + + netdev_get_queue_stats(port->netdev, queue_id, &stats); + put_queue_stats(cbdata, queue_id, &stats); + } + } + + static int + handle_queue_stats_request(struct ofproto *ofproto, struct ofconn *ofconn, + const struct ofp_stats_request *osr, + size_t arg_size) + { + struct ofp_queue_stats_request *qsr; + struct queue_stats_cbdata cbdata; - struct ofport *port; + unsigned int port_no; + uint32_t queue_id; + + if (arg_size != sizeof *qsr) { + return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LEN); + } + qsr = (struct ofp_queue_stats_request *) osr->body; + + COVERAGE_INC(ofproto_queue_req); + + cbdata.ofconn = ofconn; + cbdata.msg = start_stats_reply(osr, 128); + + port_no = ntohs(qsr->port_no); + queue_id = ntohl(qsr->queue_id); + if (port_no == OFPP_ALL) { - PORT_ARRAY_FOR_EACH (port, &ofproto->ports, port_no) { - handle_queue_stats_for_port(port, port_no, queue_id, &cbdata); ++ struct wdp_port *ports; ++ size_t n_ports, i; ++ ++ wdp_port_list(ofproto->wdp, &ports, &n_ports); ++ /* XXX deal with wdp_port_list() errors */ ++ for (i = 0; i < n_ports; i++) { ++ handle_queue_stats_for_port(&ports[i], queue_id, &cbdata); + } ++ wdp_port_array_free(ports, n_ports); + } else if (port_no < ofproto->max_ports) { - port = port_array_get(&ofproto->ports, port_no); - if (port) { - handle_queue_stats_for_port(port, port_no, queue_id, &cbdata); ++ struct wdp_port port; ++ int error; ++ ++ error = wdp_port_query_by_number(ofproto->wdp, port_no, &port); ++ if (!error) { ++ handle_queue_stats_for_port(&port, queue_id, &cbdata); ++ } else { ++ /* XXX deal with wdp_port_query_by_number() errors */ + } ++ wdp_port_free(&port); + } else { + ofpbuf_delete(cbdata.msg); + return ofp_mkerr(OFPET_QUEUE_OP_FAILED, OFPQOFC_BAD_PORT); + } + queue_tx(cbdata.msg, ofconn, ofconn->reply_counter); + + return 0; + } + static int handle_stats_request(struct ofproto *p, struct ofconn *ofconn, struct ofp_header *oh) diff --cc ofproto/pinsched.c index f6a453aef,d749ee4e6..1446cfbfa --- a/ofproto/pinsched.c +++ b/ofproto/pinsched.c @@@ -69,17 -63,14 +69,17 @@@ struct pinsched struct status_category *ss_cat; }; -static struct ofpbuf * -dequeue_packet(struct pinsched *ps, struct ovs_queue *q, +static struct wdp_packet * +dequeue_packet(struct pinsched *ps, struct wdp_packet_queue *q, unsigned int port_no) { - struct ofpbuf *packet = queue_pop_head(q); - if (!q->n) { + struct wdp_packet *packet; + + packet = CONTAINER_OF(list_pop_front(&q->list), struct wdp_packet, list); + q->n--; + if (list_is_empty(&q->list)) { free(q); - port_array_set(&ps->queues, port_no, NULL); + port_array_delete(&ps->queues, port_no); } ps->n_queued--; return packet; diff --cc ofproto/pktbuf.c index e4c5c7211,b04eb5955..11abceb1a --- a/ofproto/pktbuf.c +++ b/ofproto/pktbuf.c @@@ -19,8 -19,8 +19,9 @@@ #include #include #include "coverage.h" + #include "ofp-util.h" #include "ofpbuf.h" +#include "openflow/openflow.h" #include "timeval.h" #include "util.h" #include "vconn.h" diff --cc ofproto/wdp-xflow.c index 81613a298,000000000..66b245a77 mode 100644,000000..100644 --- a/ofproto/wdp-xflow.c +++ b/ofproto/wdp-xflow.c @@@ -1,2286 -1,0 +1,2334 @@@ +/* + * Copyright (c) 2010 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "wdp-xflow.h" + +#include +#include + +#include "coverage.h" +#include "dhcp.h" +#include "netdev.h" +#include "netflow.h" ++#include "ofp-util.h" +#include "ofpbuf.h" +#include "openflow/nicira-ext.h" +#include "openflow/openflow.h" +#include "packets.h" +#include "poll-loop.h" +#include "port-array.h" +#include "shash.h" +#include "stp.h" +#include "svec.h" +#include "timeval.h" +#include "util.h" +#include "vconn.h" +#include "wdp-provider.h" +#include "xfif.h" +#include "xflow-util.h" +#include "xtoxll.h" + ++#include /* XXX */ ++#include /* XXX */ ++ +#define THIS_MODULE VLM_wdp_xflow +#include "vlog.h" + +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + +/* Maximum numbers of rules. */ +#define WX_MAX_WILD 65536 /* Wildcarded rules. */ +#define WX_MAX_EXACT 1048576 /* Exact-match rules. */ + +struct wx { + struct list list_node; + struct wdp wdp; + struct xfif *xfif; + struct classifier cls; + struct netdev_monitor *netdev_monitor; - struct port_array ports; /* Index is ODP port nr; wdp_port->opp.port_no - * is OFP port nr. */ ++ struct port_array ports; /* Index is xflow port nr; ++ * wdp_port->opp.port_no is OFP port nr. */ + struct shash port_by_name; + bool need_revalidate; + long long int next_expiration; +}; + +static struct list all_wx = LIST_INITIALIZER(&all_wx); + +static int wx_port_init(struct wx *); +static void wx_port_run(struct wx *); +static void wx_port_refresh_groups(struct wx *); + +enum { + WX_GROUP_FLOOD = 0, + WX_GROUP_ALL = 1 +}; + +static struct wx * +wx_cast(const struct wdp *wdp) +{ + return CONTAINER_OF(wdp, struct wx, wdp); +} + +static int +wx_xlate_actions(struct wx *, const union ofp_action *, size_t n, + const flow_t *flow, const struct ofpbuf *packet, + struct xflow_actions *out, bool *may_set_up_flow); + +struct wx_rule { + struct wdp_rule wr; + + uint64_t packet_count; /* Number of packets received. */ + uint64_t byte_count; /* Number of bytes received. */ + uint64_t accounted_bytes; /* Number of bytes passed to account_cb. */ + long long int used; /* Last-used time (0 if never used). */ + + /* If 'super' is non-NULL, this rule is a subrule, that is, it is an + * exact-match rule (having cr.wc.wildcards of 0) generated from the + * wildcard rule 'super'. In this case, 'list' is an element of the + * super-rule's list. + * + * If 'super' is NULL, this rule is a super-rule, and 'list' is the head of + * a list of subrules. A super-rule with no wildcards (where + * cr.wc.wildcards is 0) will never have any subrules. */ + struct wx_rule *super; + struct list list; + + /* Datapath actions. + * - * A super-rule with wildcard fields never has XFLOW actions (since the ++ * A super-rule with wildcard fields never has xflow actions (since the + * datapath only supports exact-match flows). */ + bool installed; /* Installed in datapath? */ + bool may_install; /* True ordinarily; false if actions must + * be reassessed for every packet. */ + int n_xflow_actions; + union xflow_action *xflow_actions; +}; + +static void wx_rule_destroy(struct wx *, struct wx_rule *); +static void wx_rule_update_actions(struct wx *, struct wx_rule *); +static void wx_rule_execute(struct wx *, struct wx_rule *, + struct ofpbuf *packet, const flow_t *); +static bool wx_rule_make_actions(struct wx *, struct wx_rule *, + const struct ofpbuf *packet); +static void wx_rule_install(struct wx *, struct wx_rule *, + struct wx_rule *displaced_rule); + +static struct wx_rule * +wx_rule_cast(const struct cls_rule *cls_rule) +{ + return cls_rule ? CONTAINER_OF(cls_rule, struct wx_rule, wr.cr) : NULL; +} + +/* Returns true if 'rule' is merely an implementation detail that should be + * hidden from the client. */ +static inline bool +wx_rule_is_hidden(const struct wx_rule *rule) +{ + return rule->super != NULL; +} + +static void +wx_rule_free(struct wx_rule *rule) +{ + wdp_rule_uninit(&rule->wr); + free(rule->xflow_actions); + free(rule); +} + +static void +wx_rule_account(struct wx *wx OVS_UNUSED, struct wx_rule *rule OVS_UNUSED, + uint64_t extra_bytes OVS_UNUSED) +{ + /* XXX call account_cb hook */ +} + +static void +wx_rule_post_uninstall(struct wx *wx, struct wx_rule *rule) +{ + struct wx_rule *super = rule->super; + + wx_rule_account(wx, rule, 0); + + /* XXX netflow expiration */ + + if (super) { + super->packet_count += rule->packet_count; + super->byte_count += rule->byte_count; + + /* Reset counters to prevent double counting if the rule ever gets + * reinstalled. */ + rule->packet_count = 0; + rule->byte_count = 0; + rule->accounted_bytes = 0; + + //XXX netflow_flow_clear(&rule->nf_flow); + } +} + +static long long int +xflow_flow_stats_to_msec(const struct xflow_flow_stats *stats) +{ + return (stats->used_sec + ? stats->used_sec * 1000 + stats->used_nsec / 1000000 + : 0); +} + +static void +wx_rule_update_time(struct wx *wx OVS_UNUSED, struct wx_rule *rule, + const struct xflow_flow_stats *stats) +{ + long long int used = xflow_flow_stats_to_msec(stats); + if (used > rule->used) { + rule->used = used; + if (rule->super && used > rule->super->used) { + rule->super->used = used; + } + //XXX netflow_flow_update_time(ofproto->netflow, &rule->nf_flow, used); + } +} + +static void +wx_rule_update_stats(struct wx *wx, struct wx_rule *rule, + const struct xflow_flow_stats *stats) +{ + if (stats->n_packets) { + wx_rule_update_time(wx, rule, stats); + rule->packet_count += stats->n_packets; + rule->byte_count += stats->n_bytes; + /* XXX netflow_flow_update_flags(&rule->nf_flow, stats->ip_tos, + stats->tcp_flags); */ + } +} + +static void +wx_rule_uninstall(struct wx *wx, struct wx_rule *rule) +{ + assert(!rule->wr.cr.flow.wildcards); + if (rule->installed) { + struct xflow_flow xflow_flow; + + xflow_key_from_flow(&xflow_flow.key, &rule->wr.cr.flow); + xflow_flow.actions = NULL; + xflow_flow.n_actions = 0; + xflow_flow.flags = 0; + if (!xfif_flow_del(wx->xfif, &xflow_flow)) { + wx_rule_update_stats(wx, rule, &xflow_flow.stats); + } + rule->installed = false; + + wx_rule_post_uninstall(wx, rule); + } +} + +#if 0 +static bool +is_controller_rule(struct wx_rule *rule) +{ + /* If the only action is send to the controller then don't report + * NetFlow expiration messages since it is just part of the control + * logic for the network and not real traffic. */ + - if (rule && rule->super) { - struct wdp_rule *super = &rule->super->wr; - - return super->n_actions == 1 && - super->actions[0].type == htons(OFPAT_OUTPUT) && - super->actions[0].output.port == htons(OFPP_CONTROLLER); - } - - return false; ++ return (rule ++ && rule->super ++ && rule->super->n_actions == 1 ++ && action_outputs_to_port(&rule->super->actions[0], ++ htons(OFPP_CONTROLLER))); +} +#endif + +static void +wx_rule_remove(struct wx *wx, struct wx_rule *rule) +{ + if (rule->wr.cr.flow.wildcards) { + COVERAGE_INC(wx_del_wc_flow); + wx->need_revalidate = true; + } else { + wx_rule_uninstall(wx, rule); + } + classifier_remove(&wx->cls, &rule->wr.cr); + wx_rule_destroy(wx, rule); +} + +static bool +wx_rule_revalidate(struct wx *wx, struct wx_rule *rule) +{ + const flow_t *flow = &rule->wr.cr.flow; + + COVERAGE_INC(wx_rule_revalidate); + if (rule->super) { + struct wx_rule *super; + super = wx_rule_cast(classifier_lookup_wild(&wx->cls, flow)); + if (!super) { + wx_rule_remove(wx, rule); + return false; + } else if (super != rule->super) { + COVERAGE_INC(wx_revalidate_moved); + list_remove(&rule->list); + list_push_back(&super->list, &rule->list); + rule->super = super; + rule->wr.hard_timeout = super->wr.hard_timeout; + rule->wr.idle_timeout = super->wr.idle_timeout; + rule->wr.created = super->wr.created; + rule->used = 0; + } + } + + wx_rule_update_actions(wx, rule); + return true; +} + +/* Destroys 'rule'. If 'rule' is a subrule, also removes it from its + * super-rule's list of subrules. If 'rule' is a super-rule, also iterates + * through all of its subrules and revalidates them, destroying any that no + * longer has a super-rule (which is probably all of them). + * + * Before calling this function, the caller must make have removed 'rule' from + * the classifier. If 'rule' is an exact-match rule, the caller is also + * responsible for ensuring that it has been uninstalled from the datapath. */ +static void +wx_rule_destroy(struct wx *wx, struct wx_rule *rule) +{ + if (!rule->super) { + struct wx_rule *subrule, *next; + LIST_FOR_EACH_SAFE (subrule, next, struct wx_rule, list, &rule->list) { + wx_rule_revalidate(wx, subrule); + } + } else { + list_remove(&rule->list); + } + wx_rule_free(rule); +} + +#if 0 +static bool +wx_rule_has_out_port(const struct wx_rule *rule, uint16_t out_port) +{ + const union ofp_action *oa; + struct actions_iterator i; + + if (out_port == htons(OFPP_NONE)) { + return true; + } + for (oa = actions_first(&i, rule->wr.actions, + rule->wr.n_actions); + oa; + oa = actions_next(&i)) { + if (oa->type == htons(OFPAT_OUTPUT) && oa->output.port == out_port) { + return true; + } + } + return false; +} +#endif + +/* Caller is responsible for initializing the 'cr' member of the returned + * rule. */ +static struct wx_rule * +wx_rule_create(struct wx_rule *super, + const union ofp_action *actions, size_t n_actions, + uint16_t idle_timeout, uint16_t hard_timeout) +{ + struct wx_rule *rule = xzalloc(sizeof *rule); + wdp_rule_init(&rule->wr, actions, n_actions); + rule->wr.idle_timeout = idle_timeout; + rule->wr.hard_timeout = hard_timeout; + rule->used = rule->wr.created; + rule->super = super; + if (super) { + list_push_back(&super->list, &rule->list); + } else { + list_init(&rule->list); + } +#if 0 + netflow_flow_clear(&rule->nf_flow); + netflow_flow_update_time(ofproto->netflow, &rule->nf_flow, rule->created); +#endif + + return rule; +} + +/* Executes the actions indicated by 'rule' on 'packet', which is in flow - * 'flow' and is considered to have arrived on XFLOW port 'in_port'. ++ * 'flow' and is considered to have arrived on xflow port 'in_port'. + * + * The flow that 'packet' actually contains does not need to actually match + * 'rule'; the actions in 'rule' will be applied to it either way. Likewise, + * the packet and byte counters for 'rule' will be credited for the packet sent + * out whether or not the packet actually matches 'rule'. + * + * If 'rule' is an exact-match rule and 'flow' actually equals the rule's flow, - * the caller must already have accurately composed XFLOW actions for it given ++ * the caller must already have accurately composed xflow actions for it given + * 'packet' using rule_make_actions(). If 'rule' is a wildcard rule, or if + * 'rule' is an exact-match rule but 'flow' is not the rule's flow, then this - * function will compose a set of XFLOW actions based on 'rule''s OpenFlow ++ * function will compose a set of xflow actions based on 'rule''s OpenFlow + * actions and apply them to 'packet'. */ +static void +wx_rule_execute(struct wx *wx, struct wx_rule *rule, + struct ofpbuf *packet, const flow_t *flow) +{ + const union xflow_action *actions; + size_t n_actions; + struct xflow_actions a; + - /* Grab or compose the XFLOW actions. ++ /* Grab or compose the xflow actions. + * + * The special case for an exact-match 'rule' where 'flow' is not the + * rule's flow is important to avoid, e.g., sending a packet out its input - * port simply because the XFLOW actions were composed for the wrong ++ * port simply because the xflow actions were composed for the wrong + * scenario. */ + if (rule->wr.cr.flow.wildcards + || !flow_equal(flow, &rule->wr.cr.flow)) + { + struct wx_rule *super = rule->super ? rule->super : rule; + if (wx_xlate_actions(wx, super->wr.actions, super->wr.n_actions, flow, + packet, &a, NULL)) { + return; + } + actions = a.actions; + n_actions = a.n_actions; + } else { + actions = rule->xflow_actions; + n_actions = rule->n_xflow_actions; + } + - /* Execute the XFLOW actions. */ ++ /* Execute the xflow actions. */ + if (!xfif_execute(wx->xfif, flow->in_port, + actions, n_actions, packet)) { + struct xflow_flow_stats stats; + flow_extract_stats(flow, packet, &stats); + wx_rule_update_stats(wx, rule, &stats); + rule->used = time_msec(); + //XXX netflow_flow_update_time(wx->netflow, &rule->nf_flow, rule->used); + } +} + +static void +wx_rule_insert(struct wx *wx, struct wx_rule *rule, struct ofpbuf *packet, + uint16_t in_port) +{ + struct wx_rule *displaced_rule; + + /* Insert the rule in the classifier. */ + displaced_rule = wx_rule_cast(classifier_insert(&wx->cls, &rule->wr.cr)); + if (!rule->wr.cr.flow.wildcards) { + wx_rule_make_actions(wx, rule, packet); + } + + /* Send the packet and credit it to the rule. */ + if (packet) { + flow_t flow; + flow_extract(packet, 0, in_port, &flow); + wx_rule_execute(wx, rule, packet, &flow); + } + + /* Install the rule in the datapath only after sending the packet, to + * avoid packet reordering. */ + if (rule->wr.cr.flow.wildcards) { + COVERAGE_INC(wx_add_wc_flow); + wx->need_revalidate = true; + } else { + wx_rule_install(wx, rule, displaced_rule); + } + + /* Free the rule that was displaced, if any. */ + if (displaced_rule) { + rule->wr.client_data = displaced_rule->wr.client_data; + wx_rule_destroy(wx, displaced_rule); + } +} + +static struct wx_rule * +wx_rule_create_subrule(struct wx *wx, struct wx_rule *rule, const flow_t *flow) +{ + struct wx_rule *subrule; + + subrule = wx_rule_create(rule, NULL, 0, + rule->wr.idle_timeout, + rule->wr.hard_timeout); + COVERAGE_INC(wx_subrule_create); + cls_rule_from_flow(flow, &subrule->wr.cr); + classifier_insert_exact(&wx->cls, &subrule->wr.cr); + + return subrule; +} + +/* Returns true if the actions changed, false otherwise. */ +static bool +wx_rule_make_actions(struct wx *wx, struct wx_rule *rule, + const struct ofpbuf *packet) +{ + const struct wx_rule *super; + struct xflow_actions a; + size_t actions_len; + + assert(!rule->wr.cr.flow.wildcards); + + super = rule->super ? rule->super : rule; + wx_xlate_actions(wx, super->wr.actions, super->wr.n_actions, + &rule->wr.cr.flow, packet, &a, &rule->may_install); + + actions_len = a.n_actions * sizeof *a.actions; + if (rule->n_xflow_actions != a.n_actions + || memcmp(rule->xflow_actions, a.actions, actions_len)) { + COVERAGE_INC(wx_xflow_unchanged); + free(rule->xflow_actions); + rule->n_xflow_actions = a.n_actions; + rule->xflow_actions = xmemdup(a.actions, actions_len); + return true; + } else { + return false; + } +} + +static int +do_put_flow(struct wx *wx, struct wx_rule *rule, int flags, + struct xflow_flow_put *put) +{ + memset(&put->flow.stats, 0, sizeof put->flow.stats); + xflow_key_from_flow(&put->flow.key, &rule->wr.cr.flow); + put->flow.actions = rule->xflow_actions; + put->flow.n_actions = rule->n_xflow_actions; + put->flow.flags = 0; + put->flags = flags; + return xfif_flow_put(wx->xfif, put); +} + +static void +wx_rule_install(struct wx *wx, struct wx_rule *rule, struct wx_rule *displaced_rule) +{ + assert(!rule->wr.cr.flow.wildcards); + + if (rule->may_install) { + struct xflow_flow_put put; + if (!do_put_flow(wx, rule, + XFLOWPF_CREATE | XFLOWPF_MODIFY | XFLOWPF_ZERO_STATS, + &put)) { + rule->installed = true; + if (displaced_rule) { + wx_rule_update_stats(wx, displaced_rule, &put.flow.stats); + wx_rule_post_uninstall(wx, displaced_rule); + } + } + } else if (displaced_rule) { + wx_rule_uninstall(wx, displaced_rule); + } +} + +static void +wx_rule_reinstall(struct wx *wx, struct wx_rule *rule) +{ + if (rule->installed) { + struct xflow_flow_put put; + COVERAGE_INC(wx_dp_missed); + do_put_flow(wx, rule, XFLOWPF_CREATE | XFLOWPF_MODIFY, &put); + } else { + wx_rule_install(wx, rule, NULL); + } +} + +static void +wx_rule_update_actions(struct wx *wx, struct wx_rule *rule) +{ + bool actions_changed; +#if 0 + uint16_t new_out_iface, old_out_iface; + + old_out_iface = rule->nf_flow.output_iface; +#endif + actions_changed = wx_rule_make_actions(wx, rule, NULL); + + if (rule->may_install) { + if (rule->installed) { + if (actions_changed) { + struct xflow_flow_put put; + do_put_flow(wx, rule, XFLOWPF_CREATE | XFLOWPF_MODIFY + | XFLOWPF_ZERO_STATS, &put); + wx_rule_update_stats(wx, rule, &put.flow.stats); +#if 0 + /* Temporarily set the old output iface so that NetFlow + * messages have the correct output interface for the old + * stats. */ + new_out_iface = rule->nf_flow.output_iface; + rule->nf_flow.output_iface = old_out_iface; +#endif + wx_rule_post_uninstall(wx, rule); + //rule->nf_flow.output_iface = new_out_iface; + } + } else { + wx_rule_install(wx, rule, NULL); + } + } else { + wx_rule_uninstall(wx, rule); + } +} + +static void +add_output_group_action(struct xflow_actions *actions, uint16_t group, + uint16_t *nf_output_iface) +{ + xflow_actions_add(actions, XFLOWAT_OUTPUT_GROUP)->output_group.group = group; + + if (group == WX_GROUP_ALL || group == WX_GROUP_FLOOD) { + *nf_output_iface = NF_OUT_FLOOD; + } +} + +static void +add_controller_action(struct xflow_actions *actions, + const struct ofp_action_output *oao) +{ + union xflow_action *a = xflow_actions_add(actions, XFLOWAT_CONTROLLER); + a->controller.arg = ntohs(oao->max_len); +} + +struct wx_xlate_ctx { + /* Input. */ + flow_t flow; /* Flow to which these actions correspond. */ + int recurse; /* Recursion level, via xlate_table_action. */ + struct wx *wx; + const struct ofpbuf *packet; /* The packet corresponding to 'flow', or a + * null pointer if we are revalidating + * without a packet to refer to. */ + + /* Output. */ + struct xflow_actions *out; /* Datapath actions. */ + //tag_type *tags; /* Tags associated with OFPP_NORMAL actions. */ + bool may_set_up_flow; /* True ordinarily; false if the actions must + * be reassessed for every packet. */ + uint16_t nf_output_iface; /* Output interface index for NetFlow. */ +}; + +static void do_xlate_actions(const union ofp_action *in, size_t n_in, + struct wx_xlate_ctx *ctx); + +static void +add_output_action(struct wx_xlate_ctx *ctx, uint16_t port) +{ + const struct wdp_port *wdp_port = port_array_get(&ctx->wx->ports, port); + + if (wdp_port) { + if (wdp_port->opp.config & OFPPC_NO_FWD) { + /* Forwarding disabled on port. */ + return; + } + } else { + /* + * We don't have an ofport record for this port, but it doesn't hurt to + * allow forwarding to it anyhow. Maybe such a port will appear later + * and we're pre-populating the flow table. + */ + } + + xflow_actions_add(ctx->out, XFLOWAT_OUTPUT)->output.port = port; + //ctx->nf_output_iface = port; +} + +static struct wx_rule * +wx_rule_lookup_valid(struct wx *wx, const flow_t *flow) +{ + struct wx_rule *rule = wx_rule_cast(classifier_lookup(&wx->cls, flow)); + + /* The rule we found might not be valid, since we could be in need of + * revalidation. If it is not valid, don't return it. */ + if (rule + && rule->super + && wx->need_revalidate + && !wx_rule_revalidate(wx, rule)) { + COVERAGE_INC(wx_invalidated); + return NULL; + } + + return rule; +} + +static void +xlate_table_action(struct wx_xlate_ctx *ctx, uint16_t in_port) +{ + if (!ctx->recurse) { + uint16_t old_in_port; + struct wx_rule *rule; + + /* Look up a flow with 'in_port' as the input port. Then restore the + * original input port (otherwise OFPP_NORMAL and OFPP_IN_PORT will + * have surprising behavior). */ + old_in_port = ctx->flow.in_port; + ctx->flow.in_port = in_port; + rule = wx_rule_lookup_valid(ctx->wx, &ctx->flow); + ctx->flow.in_port = old_in_port; + + if (rule) { + if (rule->super) { + rule = rule->super; + } + + ctx->recurse++; + do_xlate_actions(rule->wr.actions, rule->wr.n_actions, ctx); + ctx->recurse--; + } + } +} + +static void +xlate_output_action(struct wx_xlate_ctx *ctx, + const struct ofp_action_output *oao) +{ + uint16_t xflow_port; + uint16_t prev_nf_output_iface = ctx->nf_output_iface; + + ctx->nf_output_iface = NF_OUT_DROP; + + switch (ntohs(oao->port)) { + case OFPP_IN_PORT: + add_output_action(ctx, ctx->flow.in_port); + break; + case OFPP_TABLE: + xlate_table_action(ctx, ctx->flow.in_port); + break; + case OFPP_NORMAL: +#if 0 + if (!ctx->wx->ofhooks->normal_cb(ctx->flow, ctx->packet, + ctx->out, ctx->tags, + &ctx->nf_output_iface, + ctx->wx->aux)) { + COVERAGE_INC(wx_uninstallable); + ctx->may_set_up_flow = false; + } + break; +#else + /* fall through to flood for now */ +#endif + case OFPP_FLOOD: + add_output_group_action(ctx->out, WX_GROUP_FLOOD, + &ctx->nf_output_iface); + break; + case OFPP_ALL: + add_output_group_action(ctx->out, WX_GROUP_ALL, &ctx->nf_output_iface); + break; + case OFPP_CONTROLLER: + add_controller_action(ctx->out, oao); + break; + case OFPP_LOCAL: + add_output_action(ctx, XFLOWP_LOCAL); + break; + default: + xflow_port = ofp_port_to_xflow_port(ntohs(oao->port)); + if (xflow_port != ctx->flow.in_port) { + add_output_action(ctx, xflow_port); + } + break; + } + + if (prev_nf_output_iface == NF_OUT_FLOOD) { + ctx->nf_output_iface = NF_OUT_FLOOD; + } else if (ctx->nf_output_iface == NF_OUT_DROP) { + ctx->nf_output_iface = prev_nf_output_iface; + } else if (prev_nf_output_iface != NF_OUT_DROP && + ctx->nf_output_iface != NF_OUT_FLOOD) { + ctx->nf_output_iface = NF_OUT_MULTI; + } +} + ++/* If the final xflow action in 'ctx' is "pop priority", drop it, as an ++ * optimization, because we're going to add another action that sets the ++ * priority immediately after, or because there are no actions following the ++ * pop. */ ++static void ++remove_pop_action(struct wx_xlate_ctx *ctx) ++{ ++ size_t n = ctx->out->n_actions; ++ if (n > 0 && ctx->out->actions[n - 1].type == XFLOWAT_POP_PRIORITY) { ++ ctx->out->n_actions--; ++ } ++} ++ ++static void ++xlate_enqueue_action(struct wx_xlate_ctx *ctx, ++ const struct ofp_action_enqueue *oae) ++{ ++ uint16_t ofp_port, xflow_port; ++ ++ /* Figure out xflow output port. */ ++ ofp_port = ntohs(oae->port); ++ if (ofp_port != OFPP_IN_PORT) { ++ xflow_port = ofp_port_to_xflow_port(ofp_port); ++ } else { ++ xflow_port = ctx->flow.in_port; ++ } ++ ++ /* Add xflow actions. */ ++ remove_pop_action(ctx); ++ xflow_actions_add(ctx->out, XFLOWAT_SET_PRIORITY)->priority.priority ++ = TC_H_MAKE(1, ntohl(oae->queue_id)); /* XXX */ ++ add_output_action(ctx, xflow_port); ++ xflow_actions_add(ctx->out, XFLOWAT_POP_PRIORITY); ++ ++ /* Update NetFlow output port. */ ++ if (ctx->nf_output_iface == NF_OUT_DROP) { ++ ctx->nf_output_iface = xflow_port; ++ } else if (ctx->nf_output_iface != NF_OUT_FLOOD) { ++ ctx->nf_output_iface = NF_OUT_MULTI; ++ } ++} ++ +static void +xlate_nicira_action(struct wx_xlate_ctx *ctx, + const struct nx_action_header *nah) +{ + const struct nx_action_resubmit *nar; + const struct nx_action_set_tunnel *nast; + union xflow_action *oa; + int subtype = ntohs(nah->subtype); + + assert(nah->vendor == htonl(NX_VENDOR_ID)); + switch (subtype) { + case NXAST_RESUBMIT: + nar = (const struct nx_action_resubmit *) nah; + xlate_table_action(ctx, ofp_port_to_xflow_port(ntohs(nar->in_port))); + break; + + case NXAST_SET_TUNNEL: + nast = (const struct nx_action_set_tunnel *) nah; + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_TUNNEL); + ctx->flow.tun_id = oa->tunnel.tun_id = nast->tun_id; + break; + + /* If you add a new action here that modifies flow data, don't forget to - * update the flow key in ctx->flow in the same key. */ ++ * update the flow key in ctx->flow at the same time. */ + + default: + VLOG_DBG_RL(&rl, "unknown Nicira action type %"PRIu16, subtype); + break; + } +} + +static void +do_xlate_actions(const union ofp_action *in, size_t n_in, + struct wx_xlate_ctx *ctx) +{ + struct actions_iterator iter; + const union ofp_action *ia; + const struct wdp_port *port; + + port = port_array_get(&ctx->wx->ports, ctx->flow.in_port); + if (port && port->opp.config & (OFPPC_NO_RECV | OFPPC_NO_RECV_STP) && + port->opp.config & (eth_addr_equals(ctx->flow.dl_dst, stp_eth_addr) + ? OFPPC_NO_RECV_STP : OFPPC_NO_RECV)) { + /* Drop this flow. */ + return; + } + + for (ia = actions_first(&iter, in, n_in); ia; ia = actions_next(&iter)) { + uint16_t type = ntohs(ia->type); + union xflow_action *oa; + + switch (type) { + case OFPAT_OUTPUT: + xlate_output_action(ctx, &ia->output); + break; + + case OFPAT_SET_VLAN_VID: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_DL_TCI); + oa->dl_tci.tci = ia->vlan_vid.vlan_vid & htons(VLAN_VID_MASK); + oa->dl_tci.mask = htons(VLAN_VID_MASK); + ctx->flow.dl_vlan = ia->vlan_vid.vlan_vid; + break; + + case OFPAT_SET_VLAN_PCP: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_DL_TCI); + oa->dl_tci.tci = htons((ia->vlan_pcp.vlan_pcp << VLAN_PCP_SHIFT) + & VLAN_PCP_MASK); + oa->dl_tci.mask = htons(VLAN_PCP_MASK); + + if (ctx->flow.dl_vlan == htons(OFP_VLAN_NONE)) { + ctx->flow.dl_vlan = htons(0); + } + ctx->flow.dl_vlan_pcp = ia->vlan_pcp.vlan_pcp; + break; + + case OFPAT_STRIP_VLAN: + xflow_actions_add(ctx->out, XFLOWAT_STRIP_VLAN); + ctx->flow.dl_vlan = htons(OFP_VLAN_NONE); + ctx->flow.dl_vlan_pcp = 0; + break; + + case OFPAT_SET_DL_SRC: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_DL_SRC); + memcpy(oa->dl_addr.dl_addr, + ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN); + memcpy(ctx->flow.dl_src, + ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN); + break; + + case OFPAT_SET_DL_DST: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_DL_DST); + memcpy(oa->dl_addr.dl_addr, + ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN); + memcpy(ctx->flow.dl_dst, + ((struct ofp_action_dl_addr *) ia)->dl_addr, ETH_ADDR_LEN); + break; + + case OFPAT_SET_NW_SRC: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_NW_SRC); + ctx->flow.nw_src = oa->nw_addr.nw_addr = ia->nw_addr.nw_addr; + break; + + case OFPAT_SET_NW_DST: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_NW_DST); + ctx->flow.nw_dst = oa->nw_addr.nw_addr = ia->nw_addr.nw_addr; + break; + + case OFPAT_SET_NW_TOS: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_NW_TOS); + ctx->flow.nw_tos = oa->nw_tos.nw_tos = ia->nw_tos.nw_tos; + break; + + case OFPAT_SET_TP_SRC: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_TP_SRC); + ctx->flow.tp_src = oa->tp_port.tp_port = ia->tp_port.tp_port; + break; + + case OFPAT_SET_TP_DST: + oa = xflow_actions_add(ctx->out, XFLOWAT_SET_TP_DST); + ctx->flow.tp_dst = oa->tp_port.tp_port = ia->tp_port.tp_port; + break; + ++ case OFPAT_ENQUEUE: ++ xlate_enqueue_action(ctx, (const struct ofp_action_enqueue *) ia); ++ break; ++ + case OFPAT_VENDOR: + xlate_nicira_action(ctx, (const struct nx_action_header *) ia); + break; + + default: + VLOG_DBG_RL(&rl, "unknown action type %"PRIu16, type); + break; + } + } +} + +/* Returns true if 'flow' and 'actions' may be set up as a flow in the kernel. + * This is true most of the time, but we don't allow flows that would prevent + * DHCP replies from being seen by the local port to be set up in the + * kernel. + * + * We only need this, strictly speaking, when in-band control is turned on. */ +static bool +wx_may_set_up(const flow_t *flow, const struct xflow_actions *actions) +{ + if (flow->dl_type == htons(ETH_TYPE_IP) + && flow->nw_proto == IP_TYPE_UDP + && flow->tp_src == htons(DHCP_SERVER_PORT) + && flow->tp_dst == htons(DHCP_CLIENT_PORT)) { + int i; + + for (i = 0; i < actions->n_actions; i++) { + const struct xflow_action_output *oao = &actions->actions[i].output; + if (oao->type == XFLOWAT_OUTPUT && oao->port == XFLOWP_LOCAL) { + return true; + } + } + return false; + } + + return true; +} + +static int +wx_xlate_actions(struct wx *wx, const union ofp_action *in, size_t n_in, + const flow_t *flow, const struct ofpbuf *packet, + struct xflow_actions *out, bool *may_set_up_flow) +{ + //tag_type no_tags = 0; + struct wx_xlate_ctx ctx; + COVERAGE_INC(wx_ofp2xflow); + xflow_actions_init(out); + ctx.flow = *flow; + ctx.recurse = 0; + ctx.wx = wx; + ctx.packet = packet; + ctx.out = out; + //ctx.tags = tags ? tags : &no_tags; + ctx.may_set_up_flow = true; + ctx.nf_output_iface = NF_OUT_DROP; + do_xlate_actions(in, n_in, &ctx); ++ remove_pop_action(&ctx); + + if (may_set_up_flow) { + *may_set_up_flow = ctx.may_set_up_flow && wx_may_set_up(flow, out); + } +#if 0 + if (nf_output_iface) { + *nf_output_iface = ctx.nf_output_iface; + } +#endif + if (xflow_actions_overflow(out)) { + xflow_actions_init(out); + return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_TOO_MANY); + } + return 0; +} + +static void +update_used(struct wx *wx) +{ + struct xflow_flow *flows; + size_t n_flows; + size_t i; + int error; + + error = xfif_flow_list_all(wx->xfif, &flows, &n_flows); + if (error) { + return; + } + + for (i = 0; i < n_flows; i++) { + struct xflow_flow *f = &flows[i]; + struct wx_rule *rule; + flow_t flow; + + xflow_key_to_flow(&f->key, &flow); + rule = wx_rule_cast(classifier_find_rule_exactly(&wx->cls, &flow)); + if (!rule || !rule->installed) { + COVERAGE_INC(wx_unexpected_rule); + xfif_flow_del(wx->xfif, f); + continue; + } + + wx_rule_update_time(wx, rule, &f->stats); + wx_rule_account(wx, rule, f->stats.n_bytes); + } + free(flows); +} + +static void +uninstall_idle_flow(struct wx *wx, struct wx_rule *rule) +{ + assert(rule->installed); + assert(!rule->wr.cr.flow.wildcards); + + if (rule->super) { + wx_rule_remove(wx, rule); + } else { + wx_rule_uninstall(wx, rule); + } +} + +static void +expire_rule(struct cls_rule *cls_rule, void *wx_) +{ + struct wx *wx = wx_; + struct wx_rule *rule = wx_rule_cast(cls_rule); + long long int hard_expire, idle_expire, expire, now; + + hard_expire = (rule->wr.hard_timeout + ? rule->wr.created + rule->wr.hard_timeout * 1000 + : LLONG_MAX); + idle_expire = (rule->wr.idle_timeout + && (rule->super || list_is_empty(&rule->list)) + ? rule->used + rule->wr.idle_timeout * 1000 + : LLONG_MAX); + expire = MIN(hard_expire, idle_expire); + + now = time_msec(); + if (now < expire) { + if (rule->installed && now >= rule->used + 5000) { + uninstall_idle_flow(wx, rule); + } else if (!rule->wr.cr.flow.wildcards) { + //XXX active_timeout(wx, rule); + } + + return; + } + + COVERAGE_INC(wx_expired); + + /* Update stats. This code will be a no-op if the rule expired + * due to an idle timeout. */ + if (rule->wr.cr.flow.wildcards) { + struct wx_rule *subrule, *next; + LIST_FOR_EACH_SAFE (subrule, next, struct wx_rule, list, &rule->list) { + wx_rule_remove(wx, subrule); + } + } else { + wx_rule_uninstall(wx, rule); + } + +#if 0 /* XXX */ + if (!wx_rule_is_hidden(rule)) { + send_flow_removed(wx, rule, now, + (now >= hard_expire + ? OFPRR_HARD_TIMEOUT : OFPRR_IDLE_TIMEOUT)); + } +#endif + wx_rule_remove(wx, rule); +} + +struct revalidate_cbdata { + struct wx *wx; + bool revalidate_all; /* Revalidate all exact-match rules? */ + bool revalidate_subrules; /* Revalidate all exact-match subrules? */ + //struct tag_set revalidate_set; /* Set of tags to revalidate. */ +}; + +static bool +revalidate_rule(struct wx *wx, struct wx_rule *rule) +{ + const flow_t *flow = &rule->wr.cr.flow; + + COVERAGE_INC(wx_revalidate_rule); + if (rule->super) { + struct wx_rule *super; + super = wx_rule_cast(classifier_lookup_wild(&wx->cls, flow)); + if (!super) { + wx_rule_remove(wx, rule); + return false; + } else if (super != rule->super) { + COVERAGE_INC(wx_revalidate_moved); + list_remove(&rule->list); + list_push_back(&super->list, &rule->list); + rule->super = super; + rule->wr.hard_timeout = super->wr.hard_timeout; + rule->wr.idle_timeout = super->wr.idle_timeout; + rule->wr.created = super->wr.created; + rule->used = 0; + } + } + + wx_rule_update_actions(wx, rule); + return true; +} + +static void +revalidate_cb(struct cls_rule *sub_, void *cbdata_) +{ + struct wx_rule *sub = wx_rule_cast(sub_); + struct revalidate_cbdata *cbdata = cbdata_; + + if (cbdata->revalidate_all + || (cbdata->revalidate_subrules && sub->super) + /*|| (tag_set_intersects(&cbdata->revalidate_set, sub->tags))*/) { + revalidate_rule(cbdata->wx, sub); + } +} + +static void +wx_run_one(struct wx *wx) +{ + wx_port_run(wx); + + if (time_msec() >= wx->next_expiration) { + COVERAGE_INC(wx_expiration); + wx->next_expiration = time_msec() + 1000; + update_used(wx); + + classifier_for_each(&wx->cls, CLS_INC_ALL, expire_rule, wx); + + /* XXX account_checkpoint_cb */ + } + + if (wx->need_revalidate /*|| !tag_set_is_empty(&p->revalidate_set)*/) { + struct revalidate_cbdata cbdata; + cbdata.wx = wx; + cbdata.revalidate_all = false; + cbdata.revalidate_subrules = wx->need_revalidate; + //cbdata.revalidate_set = wx->revalidate_set; + //tag_set_init(&wx->revalidate_set); + COVERAGE_INC(wx_revalidate); + classifier_for_each(&wx->cls, CLS_INC_EXACT, revalidate_cb, &cbdata); + wx->need_revalidate = false; + } +} + +static void +wx_run(void) +{ + struct wx *wx; + + LIST_FOR_EACH (wx, struct wx, list_node, &all_wx) { + wx_run_one(wx); + } + xf_run(); +} + +static void +wx_wait_one(struct wx *wx) +{ + xfif_port_poll_wait(wx->xfif); + netdev_monitor_poll_wait(wx->netdev_monitor); + if (wx->need_revalidate /*|| !tag_set_is_empty(&p->revalidate_set)*/) { + poll_immediate_wake(); + } else if (wx->next_expiration != LLONG_MAX) { + poll_timer_wait_until(wx->next_expiration); + } +} + +static void +wx_wait(void) +{ + struct wx *wx; + + LIST_FOR_EACH (wx, struct wx, list_node, &all_wx) { + wx_wait_one(wx); + } + xf_wait(); +} + +static int wx_flow_flush(struct wdp *); + +static int +wx_enumerate(const struct wdp_class *wdp_class, struct svec *all_wdps) +{ + struct svec names = SVEC_EMPTY_INITIALIZER; + int error = xf_enumerate_names(wdp_class->type, &names); + svec_move(all_wdps, &names); + return error; +} + +static int +wx_open(const struct wdp_class *wdp_class, const char *name, bool create, + struct wdp **wdpp) +{ + struct xfif *xfif; + int error; + + error = (create + ? xfif_create_and_open(name, wdp_class->type, &xfif) + : xfif_open(name, wdp_class->type, &xfif)); + if (!error) { + struct wx *wx; + + wx = xzalloc(sizeof *wx); + list_push_back(&all_wx, &wx->list_node); + wdp_init(&wx->wdp, wdp_class, name, 0, 0); + wx->xfif = xfif; + classifier_init(&wx->cls); + wx->netdev_monitor = netdev_monitor_create(); + port_array_init(&wx->ports); + shash_init(&wx->port_by_name); + wx->next_expiration = time_msec() + 1000; + + wx_port_init(wx); + + *wdpp = &wx->wdp; + } + + return error; +} + +static void +wx_close(struct wdp *wdp) +{ + struct wx *wx = wx_cast(wdp); + + wx_flow_flush(wdp); + xfif_close(wx->xfif); + classifier_destroy(&wx->cls); + netdev_monitor_destroy(wx->netdev_monitor); + list_remove(&wx->list_node); + free(wx); +} + +static int +wx_get_all_names(const struct wdp *wdp, struct svec *all_names) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_get_all_names(wx->xfif, all_names); +} + +static int +wx_destroy(struct wdp *wdp) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_delete(wx->xfif); +} + +static void +hton_ofp_phy_port(struct ofp_phy_port *opp) +{ + opp->port_no = htons(opp->port_no); + opp->config = htonl(opp->config); + opp->state = htonl(opp->state); + opp->curr = htonl(opp->curr); + opp->advertised = htonl(opp->advertised); + opp->supported = htonl(opp->supported); + opp->peer = htonl(opp->peer); +} + +static int +wx_get_features(const struct wdp *wdp, struct ofpbuf **featuresp) +{ + struct wx *wx = wx_cast(wdp); + struct ofp_switch_features *osf; + struct ofpbuf *buf; + unsigned int port_no; + struct wdp_port *port; + + buf = ofpbuf_new(sizeof *osf); + osf = ofpbuf_put_zeros(buf, sizeof *osf); + osf->n_tables = 2; + osf->capabilities = htonl(OFPC_ARP_MATCH_IP); + osf->actions = htonl((1u << OFPAT_OUTPUT) | + (1u << OFPAT_SET_VLAN_VID) | + (1u << OFPAT_SET_VLAN_PCP) | + (1u << OFPAT_STRIP_VLAN) | + (1u << OFPAT_SET_DL_SRC) | + (1u << OFPAT_SET_DL_DST) | + (1u << OFPAT_SET_NW_SRC) | + (1u << OFPAT_SET_NW_DST) | + (1u << OFPAT_SET_NW_TOS) | + (1u << OFPAT_SET_TP_SRC) | - (1u << OFPAT_SET_TP_DST)); ++ (1u << OFPAT_SET_TP_DST) | ++ (1u << OFPAT_ENQUEUE)); + + PORT_ARRAY_FOR_EACH (port, &wx->ports, port_no) { + hton_ofp_phy_port(ofpbuf_put(buf, &port->opp, sizeof port->opp)); + } + + *featuresp = buf; + return 0; +} + +static void +count_subrules(struct cls_rule *cls_rule, void *n_subrules_) +{ + struct wx_rule *rule = wx_rule_cast(cls_rule); + int *n_subrules = n_subrules_; + + if (rule->super) { + (*n_subrules)++; + } +} + +static int +wx_get_stats(const struct wdp *wdp, struct wdp_stats *stats) +{ + struct wx *wx = wx_cast(wdp); + struct xflow_stats xflow_stats; + int n_subrules; + int error; + + error = xfif_get_xf_stats(wx->xfif, &xflow_stats); + + n_subrules = 0; + classifier_for_each(&wx->cls, CLS_INC_EXACT, count_subrules, &n_subrules); + + stats->exact.n_flows = classifier_count_exact(&wx->cls) - n_subrules; + stats->exact.cur_capacity = xflow_stats.cur_capacity; + stats->exact.max_capacity = MIN(WX_MAX_EXACT, xflow_stats.max_capacity); + stats->exact.n_hit = xflow_stats.n_hit; + stats->exact.n_missed = xflow_stats.n_missed; + stats->exact.n_lost = xflow_stats.n_lost; + + stats->wild.n_flows = classifier_count_wild(&wx->cls); + stats->wild.cur_capacity = WX_MAX_WILD; + stats->wild.max_capacity = WX_MAX_WILD; + stats->wild.n_hit = 0; /* XXX */ + stats->wild.n_missed = 0; /* XXX */ + stats->wild.n_lost = 0; /* XXX */ + + stats->n_ports = xflow_stats.n_ports; + stats->max_ports = xflow_stats.max_ports; + + stats->n_frags = xflow_stats.n_frags; + + stats->max_miss_queue = xflow_stats.max_miss_queue; + stats->max_action_queue = xflow_stats.max_action_queue; + stats->max_sflow_queue = xflow_stats.max_sflow_queue; + + return error; +} + +static int +wx_get_drop_frags(const struct wdp *wdp, bool *drop_frags) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_get_drop_frags(wx->xfif, drop_frags); +} + +static int +wx_set_drop_frags(struct wdp *wdp, bool drop_frags) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_set_drop_frags(wx->xfif, drop_frags); +} + +static int +wx_port_add(struct wdp *wdp, const char *devname, + bool internal, uint16_t *port_no) +{ + struct wx *wx = wx_cast(wdp); + uint16_t xflow_flags = internal ? XFLOW_PORT_INTERNAL : 0; + return xfif_port_add(wx->xfif, devname, xflow_flags, port_no); +} + +static int +wx_port_del(struct wdp *wdp, uint16_t port_no) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_port_del(wx->xfif, port_no); +} + +static int +wx_answer_port_query(const struct wdp_port *port, struct wdp_port *portp) +{ + if (port) { + wdp_port_copy(portp, port); + return 0; + } else { + return ENOENT; + } +} + +static int +wx_port_query_by_number(const struct wdp *wdp, uint16_t port_no, + struct wdp_port *portp) +{ + struct wx *wx = wx_cast(wdp); + const struct wdp_port *port; + + port = port_array_get(&wx->ports, ofp_port_to_xflow_port(port_no)); + return wx_answer_port_query(port, portp); +} + +static int +wx_port_query_by_name(const struct wdp *wdp, const char *devname, + struct wdp_port *portp) +{ + struct wx *wx = wx_cast(wdp); + + return wx_answer_port_query(shash_find_data(&wx->port_by_name, devname), + portp); +} + +static int +wx_port_set_config(struct wdp *wdp, uint16_t port_no, uint32_t config) +{ + struct wx *wx = wx_cast(wdp); + struct wdp_port *port; + uint32_t changes; + + port = port_array_get(&wx->ports, ofp_port_to_xflow_port(port_no)); + if (!port) { + return ENOENT; + } + changes = config ^ port->opp.config; + + if (changes & OFPPC_PORT_DOWN) { + int error; + if (config & OFPPC_PORT_DOWN) { + error = netdev_turn_flags_off(port->netdev, NETDEV_UP, true); + } else { + error = netdev_turn_flags_on(port->netdev, NETDEV_UP, true); + } + if (!error) { + port->opp.config ^= OFPPC_PORT_DOWN; + } + } + +#define REVALIDATE_BITS (OFPPC_NO_RECV | OFPPC_NO_RECV_STP | OFPPC_NO_FWD) + if (changes & REVALIDATE_BITS) { + COVERAGE_INC(wx_costly_flags); + port->opp.config ^= changes & REVALIDATE_BITS; + wx->need_revalidate = true; + } +#undef REVALIDATE_BITS + + if (changes & OFPPC_NO_FLOOD) { + port->opp.config ^= OFPPC_NO_FLOOD; + wx_port_refresh_groups(wx); + } + + if (changes & OFPPC_NO_PACKET_IN) { + port->opp.config ^= OFPPC_NO_PACKET_IN; + } + + return 0; +} + +static int +wx_port_list(const struct wdp *wdp, struct wdp_port **portsp, size_t *n_portsp) +{ + struct wx *wx = wx_cast(wdp); + struct wdp_port *ports, *port; + unsigned int port_no; + size_t n_ports, i; + + *n_portsp = n_ports = port_array_count(&wx->ports); + *portsp = ports = xmalloc(n_ports * sizeof *ports); + i = 0; + PORT_ARRAY_FOR_EACH (port, &wx->ports, port_no) { + wdp_port_copy(&ports[i++], port); + } + assert(i == n_ports); + + return 0; +} + +static int +wx_port_poll(const struct wdp *wdp, char **devnamep) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_port_poll(wx->xfif, devnamep); +} + +static void +wx_port_poll_wait(const struct wdp *wdp) +{ + struct wx *wx = wx_cast(wdp); + + xfif_port_poll_wait(wx->xfif); +} + +static struct wdp_rule * +wx_flow_get(const struct wdp *wdp, const flow_t *flow) +{ + struct wx *wx = wx_cast(wdp); + struct wx_rule *rule; + + rule = wx_rule_cast(classifier_find_rule_exactly(&wx->cls, flow)); + return rule && !wx_rule_is_hidden(rule) ? &rule->wr : NULL; +} + +static struct wdp_rule * +wx_flow_match(const struct wdp *wdp, const flow_t *flow) +{ + struct wx *wx = wx_cast(wdp); + struct wx_rule *rule; + + rule = wx_rule_cast(classifier_lookup(&wx->cls, flow)); + if (rule) { + if (wx_rule_is_hidden(rule)) { + rule = rule->super; + } + return &rule->wr; + } else { + return NULL; + } +} + +struct wx_for_each_thunk_aux { + wdp_flow_cb_func *client_callback; + void *client_aux; +}; + +static void +wx_for_each_thunk(struct cls_rule *cls_rule, void *aux_) +{ + struct wx_for_each_thunk_aux *aux = aux_; + struct wx_rule *rule = wx_rule_cast(cls_rule); + + if (!wx_rule_is_hidden(rule)) { + aux->client_callback(&rule->wr, aux->client_aux); + } +} + +static void +wx_flow_for_each_match(const struct wdp *wdp, const flow_t *target, + int include, + wdp_flow_cb_func *client_callback, void *client_aux) +{ + struct wx *wx = wx_cast(wdp); + struct wx_for_each_thunk_aux aux; + + aux.client_callback = client_callback; + aux.client_aux = client_aux; + classifier_for_each_match(&wx->cls, target, include, + wx_for_each_thunk, &aux); +} + +/* Obtains statistic counters for 'rule' within 'wx' and stores them into + * '*stats'. If 'rule' is a wildcarded rule, the returned statistic include + * statistics for all of 'rule''s subrules. */ +static void +query_stats(struct wx *wx, struct wx_rule *rule, struct wdp_flow_stats *stats) +{ + struct wx_rule *subrule; + struct xflow_flow *xflow_flows; + size_t n_xflow_flows; + + /* Start from historical data for 'rule' itself that are no longer tracked + * by the datapath. This counts, for example, subrules that have + * expired. */ + stats->n_packets = rule->packet_count; + stats->n_bytes = rule->byte_count; + stats->inserted = rule->wr.created; + stats->used = LLONG_MIN; + stats->tcp_flags = 0; + stats->ip_tos = 0; + + /* Prepare to ask the datapath for statistics on 'rule', or if it is + * wildcarded then on all of its subrules. + * + * Also, add any statistics that are not tracked by the datapath for each + * subrule. This includes, for example, statistics for packets that were + * executed "by hand" by ofproto via xfif_execute() but must be accounted + * to a flow. */ + n_xflow_flows = rule->wr.cr.flow.wildcards ? list_size(&rule->list) : 1; + xflow_flows = xzalloc(n_xflow_flows * sizeof *xflow_flows); + if (rule->wr.cr.flow.wildcards) { + size_t i = 0; + LIST_FOR_EACH (subrule, struct wx_rule, list, &rule->list) { + xflow_key_from_flow(&xflow_flows[i++].key, &subrule->wr.cr.flow); + stats->n_packets += subrule->packet_count; + stats->n_bytes += subrule->byte_count; + } + } else { + xflow_key_from_flow(&xflow_flows[0].key, &rule->wr.cr.flow); + } + + /* Fetch up-to-date statistics from the datapath and add them in. */ + if (!xfif_flow_get_multiple(wx->xfif, xflow_flows, n_xflow_flows)) { + size_t i; + for (i = 0; i < n_xflow_flows; i++) { + struct xflow_flow *xflow_flow = &xflow_flows[i]; + long long int used; + + stats->n_packets += xflow_flow->stats.n_packets; + stats->n_bytes += xflow_flow->stats.n_bytes; + used = xflow_flow_stats_to_msec(&xflow_flow->stats); + if (used > stats->used) { + stats->used = used; + if (xflow_flow->key.dl_type == htons(ETH_TYPE_IP) + && xflow_flow->key.nw_proto == IP_TYPE_TCP) { + stats->ip_tos = xflow_flow->stats.ip_tos; + } + } + stats->tcp_flags |= xflow_flow->stats.tcp_flags; + } + } + free(xflow_flows); +} + +static int +wx_flow_get_stats(const struct wdp *wdp, + const struct wdp_rule *wdp_rule, + struct wdp_flow_stats *stats) +{ + struct wx *wx = wx_cast(wdp); + struct wx_rule *rule = wx_rule_cast(&wdp_rule->cr); + + query_stats(wx, rule, stats); + return 0; +} + +static bool +wx_flow_overlaps(const struct wdp *wdp, const flow_t *flow) +{ + struct wx *wx = wx_cast(wdp); + + /* XXX overlap with a subrule? */ + return classifier_rule_overlaps(&wx->cls, flow); +} + +static int +wx_flow_put(struct wdp *wdp, const struct wdp_flow_put *put, + struct wdp_flow_stats *old_stats, struct wdp_rule **rulep) +{ + struct wx *wx = wx_cast(wdp); + struct wx_rule *rule; + + rule = wx_rule_cast(classifier_find_rule_exactly(&wx->cls, put->flow)); + if (rule && wx_rule_is_hidden(rule)) { + rule = NULL; + } + + if (rule) { + if (!(put->flags & WDP_PUT_MODIFY)) { + return EEXIST; + } + } else { + if (!(put->flags & WDP_PUT_CREATE)) { + return EINVAL; + } + if ((put->flow->wildcards + ? classifier_count_wild(&wx->cls) >= WX_MAX_WILD + : classifier_count_exact(&wx->cls) >= WX_MAX_EXACT)) { + /* XXX subrules should not count against exact-match limit */ + return ENOBUFS; + } + } + + rule = wx_rule_create(NULL, put->actions, put->n_actions, + put->idle_timeout, put->hard_timeout); + cls_rule_from_flow(put->flow, &rule->wr.cr); + wx_rule_insert(wx, rule, NULL, 0); + + if (old_stats) { + /* XXX */ + memset(old_stats, 0, sizeof *old_stats); + } + if (rulep) { + *rulep = &rule->wr; + } + + return 0; +} + +static int +wx_flow_delete(struct wdp *wdp, struct wdp_rule *wdp_rule, + struct wdp_flow_stats *final_stats) +{ + struct wx *wx = wx_cast(wdp); + struct wx_rule *rule = wx_rule_cast(&wdp_rule->cr); + + wx_rule_remove(wx, rule); + if (final_stats) { + memset(final_stats, 0, sizeof *final_stats); /* XXX */ + } + return 0; +} + +static void +wx_flush_rule(struct cls_rule *cls_rule, void *wx_) +{ + struct wx_rule *rule = wx_rule_cast(cls_rule); + struct wx *wx = wx_; + + /* Mark the flow as not installed, even though it might really be + * installed, so that wx_rule_remove() doesn't bother trying to uninstall + * it. There is no point in uninstalling it individually since we are + * about to blow away all the flows with xfif_flow_flush(). */ + rule->installed = false; + + wx_rule_remove(wx, rule); +} + +static int +wx_flow_flush(struct wdp *wdp) +{ + struct wx *wx = wx_cast(wdp); + + COVERAGE_INC(wx_flow_flush); + classifier_for_each(&wx->cls, CLS_INC_ALL, wx_flush_rule, wx); + xfif_flow_flush(wx->xfif); + return 0; +} + +static int +wx_execute(struct wdp *wdp, uint16_t in_port, + const union ofp_action actions[], int n_actions, + const struct ofpbuf *packet) +{ + struct wx *wx = wx_cast(wdp); + struct xflow_actions xflow_actions; + flow_t flow; + int error; + + flow_extract((struct ofpbuf *) packet, 0, in_port, &flow); + error = wx_xlate_actions(wx, actions, n_actions, &flow, packet, + &xflow_actions, NULL); + if (error) { + return error; + } + xfif_execute(wx->xfif, ofp_port_to_xflow_port(in_port), + xflow_actions.actions, xflow_actions.n_actions, packet); + return 0; +} + +static int +wx_flow_inject(struct wdp *wdp, struct wdp_rule *wdp_rule, + uint16_t in_port, const struct ofpbuf *packet) +{ + struct wx_rule *rule = wx_rule_cast(&wdp_rule->cr); + int error; + + error = wx_execute(wdp, in_port, rule->wr.actions, rule->wr.n_actions, + packet); + if (!error) { + rule->packet_count++; + rule->byte_count += packet->size; + rule->used = time_msec(); + } + return error; +} + +static int +wx_recv_get_mask(const struct wdp *wdp, int *listen_mask) +{ + struct wx *wx = wx_cast(wdp); + int xflow_listen_mask; + int error; + + error = xfif_recv_get_mask(wx->xfif, &xflow_listen_mask); + if (!error) { + *listen_mask = 0; + if (xflow_listen_mask & XFLOWL_MISS) { + *listen_mask |= 1 << WDP_CHAN_MISS; + } + if (xflow_listen_mask & XFLOWL_ACTION) { + *listen_mask |= 1 << WDP_CHAN_ACTION; + } + if (xflow_listen_mask & XFLOWL_SFLOW) { + *listen_mask |= 1 << WDP_CHAN_SFLOW; + } + } + return error; +} + +static int +wx_recv_set_mask(struct wdp *wdp, int listen_mask) +{ + struct wx *wx = wx_cast(wdp); + int xflow_listen_mask; + + xflow_listen_mask = 0; + if (listen_mask & (1 << WDP_CHAN_MISS)) { + xflow_listen_mask |= XFLOWL_MISS; + } + if (listen_mask & (1 << WDP_CHAN_ACTION)) { + xflow_listen_mask |= XFLOWL_ACTION; + } + if (listen_mask & (1 << WDP_CHAN_SFLOW)) { + xflow_listen_mask |= XFLOWL_SFLOW; + } + + return xfif_recv_set_mask(wx->xfif, xflow_listen_mask); +} + +static int +wx_get_sflow_probability(const struct wdp *wdp, uint32_t *probability) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_get_sflow_probability(wx->xfif, probability); +} + +static int +wx_set_sflow_probability(struct wdp *wdp, uint32_t probability) +{ + struct wx *wx = wx_cast(wdp); + + return xfif_set_sflow_probability(wx->xfif, probability); +} + +static int +wx_translate_xflow_msg(struct xflow_msg *msg, struct ofpbuf *payload, + struct wdp_packet *packet) +{ + packet->in_port = xflow_port_to_ofp_port(msg->port); + packet->send_len = 0; + packet->tun_id = 0; + + switch (msg->type) { + case _XFLOWL_MISS_NR: + packet->channel = WDP_CHAN_MISS; + packet->payload = payload; + packet->tun_id = msg->arg; + return 0; + + case _XFLOWL_ACTION_NR: + packet->channel = WDP_CHAN_ACTION; + packet->payload = payload; + packet->send_len = msg->arg; + return 0; + + case _XFLOWL_SFLOW_NR: + /* XXX */ + ofpbuf_delete(payload); + return ENOSYS; + + default: + VLOG_WARN_RL(&rl, "received XFLOW message of unexpected type %"PRIu32, + msg->type); + ofpbuf_delete(payload); + return ENOSYS; + } +} + +static const uint8_t * +get_local_mac(const struct wx *wx) +{ + const struct wdp_port *port = port_array_get(&wx->ports, XFLOWP_LOCAL); + return port ? port->opp.hw_addr : NULL; +} + +/* Returns true if 'packet' is a DHCP reply to the local port. Such a reply + * should be sent to the local port regardless of the flow table. + * + * We only need this, strictly speaking, when in-band control is turned on. */ +static bool +wx_is_local_dhcp_reply(const struct wx *wx, + const flow_t *flow, const struct ofpbuf *packet) +{ + if (flow->dl_type == htons(ETH_TYPE_IP) + && flow->nw_proto == IP_TYPE_UDP + && flow->tp_src == htons(DHCP_SERVER_PORT) + && flow->tp_dst == htons(DHCP_CLIENT_PORT) + && packet->l7) + { + const uint8_t *local_mac = get_local_mac(wx); + struct dhcp_header *dhcp = ofpbuf_at( + packet, (char *)packet->l7 - (char *)packet->data, sizeof *dhcp); + return dhcp && local_mac && eth_addr_equals(dhcp->chaddr, local_mac); + } + + return false; +} + +static bool +wx_explode_rule(struct wx *wx, struct xflow_msg *msg, struct ofpbuf *payload) +{ + struct wx_rule *rule; + flow_t flow; + + flow_extract(payload, 0, xflow_port_to_ofp_port(msg->port), &flow); + + if (wx_is_local_dhcp_reply(wx, &flow, payload)) { + union xflow_action action; + + memset(&action, 0, sizeof(action)); + action.output.type = XFLOWAT_OUTPUT; + action.output.port = XFLOWP_LOCAL; + xfif_execute(wx->xfif, msg->port, &action, 1, payload); + } + + rule = wx_rule_lookup_valid(wx, &flow); + if (!rule) { + return false; + } + + if (rule->wr.cr.flow.wildcards) { + rule = wx_rule_create_subrule(wx, rule, &flow); + wx_rule_make_actions(wx, rule, payload); + } else { + if (!rule->may_install) { + /* The rule is not installable, that is, we need to process every + * packet, so process the current packet and set its actions into + * 'subrule'. */ + wx_rule_make_actions(wx, rule, payload); + } else { + /* XXX revalidate rule if it needs it */ + } + } + + wx_rule_execute(wx, rule, payload, &flow); + wx_rule_reinstall(wx, rule); + + return true; +} + +static int +wx_recv(struct wdp *wdp, struct wdp_packet *packet) +{ + struct wx *wx = wx_cast(wdp); + int i; + + /* XXX need to avoid 50*50 potential cost for caller. */ + for (i = 0; i < 50; i++) { + struct xflow_msg *msg; + struct ofpbuf *buf; + int error; + + error = xfif_recv(wx->xfif, &buf); + if (error) { + return error; + } + + msg = ofpbuf_pull(buf, sizeof *msg); + if (msg->type != _XFLOWL_MISS_NR || !wx_explode_rule(wx, msg, buf)) { + return wx_translate_xflow_msg(msg, buf, packet); + } + ofpbuf_delete(buf); + } + return EAGAIN; +} + +static void +wx_recv_wait(struct wdp *wdp) +{ + struct wx *wx = wx_cast(wdp); + + xfif_recv_wait(wx->xfif); +} + +static void wx_port_update(struct wx *, const char *devname); +static void wx_port_reinit(struct wx *); + +static void +wx_port_process_change(struct wx *wx, int error, char *devname) +{ + if (error == ENOBUFS) { + wx_port_reinit(wx); + } else if (!error) { + wx_port_update(wx, devname); + free(devname); + } +} + +static void +wx_port_run(struct wx *wx) +{ + char *devname; + int error; + + while ((error = xfif_port_poll(wx->xfif, &devname)) != EAGAIN) { + wx_port_process_change(wx, error, devname); + } + while ((error = netdev_monitor_poll(wx->netdev_monitor, + &devname)) != EAGAIN) { + wx_port_process_change(wx, error, devname); + } +} + +static size_t +wx_port_refresh_group(struct wx *wx, unsigned int group) +{ + uint16_t *ports; + size_t n_ports; + struct wdp_port *port; + unsigned int port_no; + + assert(group == WX_GROUP_ALL || group == WX_GROUP_FLOOD); + + ports = xmalloc(port_array_count(&wx->ports) * sizeof *ports); + n_ports = 0; + PORT_ARRAY_FOR_EACH (port, &wx->ports, port_no) { + if (group == WX_GROUP_ALL || !(port->opp.config & OFPPC_NO_FLOOD)) { + ports[n_ports++] = port_no; + } + } + xfif_port_group_set(wx->xfif, group, ports, n_ports); + free(ports); + + return n_ports; +} + +static void +wx_port_refresh_groups(struct wx *wx) +{ + wx_port_refresh_group(wx, WX_GROUP_FLOOD); + wx_port_refresh_group(wx, WX_GROUP_ALL); +} + +static void +wx_port_reinit(struct wx *wx) +{ + struct svec devnames; + struct wdp_port *wdp_port; + unsigned int port_no; + struct xflow_port *xflow_ports; + size_t n_xflow_ports; + size_t i; + + svec_init(&devnames); + PORT_ARRAY_FOR_EACH (wdp_port, &wx->ports, port_no) { + svec_add (&devnames, (char *) wdp_port->opp.name); + } + xfif_port_list(wx->xfif, &xflow_ports, &n_xflow_ports); + for (i = 0; i < n_xflow_ports; i++) { + svec_add(&devnames, xflow_ports[i].devname); + } + free(xflow_ports); + + svec_sort_unique(&devnames); + for (i = 0; i < devnames.n; i++) { + wx_port_update(wx, devnames.names[i]); + } + svec_destroy(&devnames); + + wx_port_refresh_groups(wx); +} + +static struct wdp_port * +make_wdp_port(const struct xflow_port *xflow_port) +{ + struct netdev_options netdev_options; + enum netdev_flags flags; + struct wdp_port *wdp_port; + struct netdev *netdev; + bool carrier; + int error; + + memset(&netdev_options, 0, sizeof netdev_options); + netdev_options.name = xflow_port->devname; + netdev_options.ethertype = NETDEV_ETH_TYPE_NONE; + + error = netdev_open(&netdev_options, &netdev); + if (error) { + VLOG_WARN_RL(&rl, "ignoring port %s (%"PRIu16") because netdev %s " + "cannot be opened (%s)", + xflow_port->devname, xflow_port->port, + xflow_port->devname, strerror(error)); + return NULL; + } + + wdp_port = xmalloc(sizeof *wdp_port); + wdp_port->netdev = netdev; + wdp_port->opp.port_no = xflow_port_to_ofp_port(xflow_port->port); + netdev_get_etheraddr(netdev, wdp_port->opp.hw_addr); + strncpy((char *) wdp_port->opp.name, xflow_port->devname, + sizeof wdp_port->opp.name); + wdp_port->opp.name[sizeof wdp_port->opp.name - 1] = '\0'; + + netdev_get_flags(netdev, &flags); + wdp_port->opp.config = flags & NETDEV_UP ? 0 : OFPPC_PORT_DOWN; + + netdev_get_carrier(netdev, &carrier); + wdp_port->opp.state = carrier ? 0 : OFPPS_LINK_DOWN; + + netdev_get_features(netdev, + &wdp_port->opp.curr, &wdp_port->opp.advertised, + &wdp_port->opp.supported, &wdp_port->opp.peer); + + wdp_port->devname = xstrdup(xflow_port->devname); + wdp_port->internal = (xflow_port->flags & XFLOW_PORT_INTERNAL) != 0; + return wdp_port; +} + +static bool +wx_port_conflicts(const struct wx *wx, const struct xflow_port *xflow_port) +{ + if (port_array_get(&wx->ports, xflow_port->port)) { + VLOG_WARN_RL(&rl, "ignoring duplicate port %"PRIu16" in datapath", + xflow_port->port); + return true; + } else if (shash_find(&wx->port_by_name, xflow_port->devname)) { + VLOG_WARN_RL(&rl, "ignoring duplicate device %s in datapath", + xflow_port->devname); + return true; + } else { + return false; + } +} + +static int +wdp_port_equal(const struct wdp_port *a_, const struct wdp_port *b_) +{ + const struct ofp_phy_port *a = &a_->opp; + const struct ofp_phy_port *b = &b_->opp; + + BUILD_ASSERT_DECL(sizeof *a == 48); /* Detect ofp_phy_port changes. */ + return (a->port_no == b->port_no + && !memcmp(a->hw_addr, b->hw_addr, sizeof a->hw_addr) + && !strcmp((char *) a->name, (char *) b->name) + && a->state == b->state + && a->config == b->config + && a->curr == b->curr + && a->advertised == b->advertised + && a->supported == b->supported + && a->peer == b->peer); +} + +static void +wx_port_install(struct wx *wx, struct wdp_port *wdp_port) +{ + uint16_t xflow_port = ofp_port_to_xflow_port(wdp_port->opp.port_no); + const char *netdev_name = (const char *) wdp_port->opp.name; + + netdev_monitor_add(wx->netdev_monitor, wdp_port->netdev); + port_array_set(&wx->ports, xflow_port, wdp_port); + shash_add(&wx->port_by_name, netdev_name, wdp_port); +} + +static void +wx_port_remove(struct wx *wx, struct wdp_port *wdp_port) +{ + uint16_t xflow_port = ofp_port_to_xflow_port(wdp_port->opp.port_no); + + netdev_monitor_remove(wx->netdev_monitor, wdp_port->netdev); - port_array_set(&wx->ports, xflow_port, NULL); ++ port_array_delete(&wx->ports, xflow_port); + shash_delete(&wx->port_by_name, + shash_find(&wx->port_by_name, (char *) wdp_port->opp.name)); +} + +static void +wx_port_free(struct wdp_port *wdp_port) +{ + if (wdp_port) { + netdev_close(wdp_port->netdev); + free(wdp_port); + } +} + +static void +wx_port_update(struct wx *wx, const char *devname) +{ + struct xflow_port xflow_port; + struct wdp_port *old_wdp_port; + struct wdp_port *new_wdp_port; + int error; + + COVERAGE_INC(wx_update_port); + + /* Query the datapath for port information. */ + error = xfif_port_query_by_name(wx->xfif, devname, &xflow_port); + + /* Find the old wdp_port. */ + old_wdp_port = shash_find_data(&wx->port_by_name, devname); + if (!error) { + if (!old_wdp_port) { + /* There's no port named 'devname' but there might be a port with + * the same port number. This could happen if a port is deleted + * and then a new one added in its place very quickly, or if a port + * is renamed. In the former case we want to send an OFPPR_DELETE + * and an OFPPR_ADD, and in the latter case we want to send a + * single OFPPR_MODIFY. We can distinguish the cases by comparing + * the old port's ifindex against the new port, or perhaps less + * reliably but more portably by comparing the old port's MAC + * against the new port's MAC. However, this code isn't that smart + * and always sends an OFPPR_MODIFY (XXX). */ + old_wdp_port = port_array_get(&wx->ports, xflow_port.port); + } + } else if (error != ENOENT && error != ENODEV) { + VLOG_WARN_RL(&rl, "xfif_port_query_by_name returned unexpected error " + "%s", strerror(error)); + return; + } + + /* Create a new wdp_port. */ + new_wdp_port = !error ? make_wdp_port(&xflow_port) : NULL; + + /* Eliminate a few pathological cases. */ + if (!old_wdp_port && !new_wdp_port) { + return; + } else if (old_wdp_port && new_wdp_port) { + /* Most of the 'config' bits are OpenFlow soft state, but + * OFPPC_PORT_DOWN is maintained by the kernel. So transfer the + * OpenFlow bits from old_wdp_port. (make_wdp_port() only sets + * OFPPC_PORT_DOWN and leaves the other bits 0.) */ + new_wdp_port->opp.config |= old_wdp_port->opp.config & ~OFPPC_PORT_DOWN; + + if (wdp_port_equal(old_wdp_port, new_wdp_port)) { + /* False alarm--no change. */ + wx_port_free(new_wdp_port); + return; + } + } + + /* Now deal with the normal cases. */ + if (old_wdp_port) { + wx_port_remove(wx, old_wdp_port); + } + if (new_wdp_port) { + wx_port_install(wx, new_wdp_port); + } + wx_port_free(old_wdp_port); + + /* Update port groups. */ + wx_port_refresh_groups(wx); +} + +static int +wx_port_init(struct wx *wx) +{ + struct xflow_port *ports; + size_t n_ports; + size_t i; + int error; + + error = xfif_port_list(wx->xfif, &ports, &n_ports); + if (error) { + return error; + } + + for (i = 0; i < n_ports; i++) { + const struct xflow_port *xflow_port = &ports[i]; + if (!wx_port_conflicts(wx, xflow_port)) { + struct wdp_port *wdp_port = make_wdp_port(xflow_port); + if (wdp_port) { + wx_port_install(wx, wdp_port); + } + } + } + free(ports); + wx_port_refresh_groups(wx); + return 0; +} + +void +wdp_xflow_register(void) +{ + static const struct wdp_class wdp_xflow_class = { + NULL, /* name */ + wx_run, + wx_wait, + wx_enumerate, + wx_open, + wx_close, + wx_get_all_names, + wx_destroy, + wx_get_features, + wx_get_stats, + wx_get_drop_frags, + wx_set_drop_frags, + wx_port_add, + wx_port_del, + wx_port_query_by_number, + wx_port_query_by_name, + wx_port_list, + wx_port_set_config, + wx_port_poll, + wx_port_poll_wait, + wx_flow_get, + wx_flow_match, + wx_flow_for_each_match, + wx_flow_get_stats, + wx_flow_overlaps, + wx_flow_put, + wx_flow_delete, + wx_flow_flush, + wx_flow_inject, + wx_execute, + wx_recv_get_mask, + wx_recv_set_mask, + wx_get_sflow_probability, + wx_set_sflow_probability, + wx_recv, + wx_recv_wait, + }; + + static bool inited = false; + + struct svec types; + const char *type; + bool registered; + int i; + + if (inited) { + return; + } + inited = true; + + svec_init(&types); + xf_enumerate_types(&types); + + registered = false; + SVEC_FOR_EACH (i, type, &types) { + struct wdp_class *class; + + class = xmalloc(sizeof *class); + *class = wdp_xflow_class; + class->type = xstrdup(type); + if (registered) { + class->run = NULL; + class->wait = NULL; + } + if (!wdp_register_provider(class)) { + registered = true; + } + } + + svec_destroy(&types); +} diff --cc utilities/ovs-ofctl.c index 2c9082cc6,c2f4feffa..4eeec5613 --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@@ -36,8 -36,10 +36,9 @@@ #include "dynamic-string.h" #include "netdev.h" #include "netlink.h" -#include "odp-util.h" +#include "xflow-util.h" #include "ofp-print.h" + #include "ofp-util.h" #include "ofpbuf.h" #include "openflow/nicira-ext.h" #include "openflow/openflow.h" diff --cc vswitchd/bridge.c index 3ec45b51c,47f269f9e..3c5d9f695 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@@ -164,15 -163,8 +163,8 @@@ struct bridge /* OpenFlow switch processing. */ struct ofproto *ofproto; /* OpenFlow switch. */ - /* Description strings. */ - char *mfr_desc; /* Manufacturer. */ - char *hw_desc; /* Hardware. */ - char *sw_desc; /* Software version. */ - char *serial_desc; /* Serial number. */ - char *dp_desc; /* Datapath description. */ - /* Kernel datapath information. */ - struct dpif *dpif; /* Datapath. */ + struct xfif *xfif; /* Datapath. */ struct port_array ifaces; /* Indexed by kernel datapath port number. */ /* Bridge ports. */ @@@ -260,10 -249,11 +249,11 @@@ static struct iface *iface_create(struc const struct ovsrec_interface *if_cfg); static void iface_destroy(struct iface *); static struct iface *iface_lookup(const struct bridge *, const char *name); -static struct iface *iface_from_dp_ifidx(const struct bridge *, - uint16_t dp_ifidx); +static struct iface *iface_from_xf_ifidx(const struct bridge *, + uint16_t xf_ifidx); static bool iface_is_internal(const struct bridge *, const char *name); static void iface_set_mac(struct iface *); + static void iface_update_qos(struct iface *, const struct ovsrec_qos *); /* Hooks into ofproto processing. */ static struct ofhooks bridge_ofhooks;