From 0a740f48293efa6162accc385f06def13b15f49b Mon Sep 17 00:00:00 2001 From: Ethan Jackson Date: Thu, 20 Dec 2012 15:32:03 -0800 Subject: [PATCH] ofproto-dpif: Implement patch ports in userspace. This commit moves responsibility for implementing patch ports from the datapath to ofproto-dpif. There are two main reasons to do this. The first is a matter of design: ofproto-dpif both has more information than the datapath, and is better suited to handle the complexity required to implement patch ports. The second is performance. My setup is a virtual machine with two basic learning bridges connected by patch ports. I used ovs-benchmark to ping the virtual router IP residing outside the VM. Over a 60 second run, "ovs-benchmark rate" improves from 14618.1 to 19311.9 transactions per second, or a 32% improvement. Similarly, "ovs-benchmark latency" improves from 6ms to 4ms. Signed-off-by: Ethan Jackson --- FAQ | 35 +++-- NEWS | 1 + lib/netdev-provider.h | 1 + lib/netdev-vport.c | 285 +++++++++++++++++++++-------------------- lib/netdev-vport.h | 10 +- lib/netdev.c | 8 ++ lib/netdev.h | 1 + ofproto/ofproto-dpif.c | 180 ++++++++++++++++++++++---- tests/ofproto-dpif.at | 57 +++++++++ 9 files changed, 397 insertions(+), 181 deletions(-) diff --git a/FAQ b/FAQ index 72a147978..9fb60f754 100644 --- a/FAQ +++ b/FAQ @@ -158,28 +158,25 @@ Q: What features are not available in the Open vSwitch kernel datapath that ships as part of the upstream Linux kernel? A: The kernel module in upstream Linux 3.3 and later does not include - the following features: - - - Tunnel virtual ports, that is, interfaces with type "gre", - "ipsec_gre", "capwap". It is possible to create tunnels in - Linux and attach them to Open vSwitch as system devices. - However, they cannot be dynamically created through the OVSDB - protocol or set the tunnel ids as a flow action. - - Work is in progress in adding these features to the upstream - Linux version of the Open vSwitch kernel module. For now, if - you need these features, use the kernel module from the Open - vSwitch distribution instead of the upstream Linux kernel - module. - - - Patch virtual ports, that is, interfaces with type "patch". - You can use Linux "veth" devices as a substitute. - - We don't have any plans to add patch ports upstream. + tunnel virtual ports, that is, interfaces with type "gre", + "ipsec_gre", "gre64", "ipsec_gre64", "vxlan", or "capwap". It is + possible to create tunnels in Linux and attach them to Open vSwitch + as system devices. However, they cannot be dynamically created + through the OVSDB protocol or set the tunnel ids as a flow action. + + Work is in progress in adding tunnel virtual ports to the upstream + Linux version of the Open vSwitch kernel module. For now, if you + need these features, use the kernel module from the Open vSwitch + distribution instead of the upstream Linux kernel module. + + The upstream kernel module does not include patch ports, but this + only matters for Open vSwitch 1.9 and earlier, because Open vSwitch + 1.10 and later implement patch ports without using this kernel + feature. Q: What features are not available when using the userspace datapath? -A: Tunnel and patch virtual ports are not supported, as described in the +A: Tunnel virtual ports are not supported, as described in the previous answer. It is also not possible to use queue-related actions. On Linux kernels before 2.6.39, maximum-sized VLAN packets may not be transmitted. diff --git a/NEWS b/NEWS index 6cf09ba4f..62488d002 100644 --- a/NEWS +++ b/NEWS @@ -25,6 +25,7 @@ post-v1.9.0 retire that meaning of ANY in favor of the OpenFlow 1.1 meaning. - Inheritance of the Don't Fragment bit in IP tunnels (df_inherit) is no longer supported. + - Patch ports are implemented in userspace. v1.9.0 - xx xxx xxxx diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 9db950cf7..c7de2c2e4 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -601,6 +601,7 @@ const struct netdev_class *netdev_lookup_provider(const char *type); extern const struct netdev_class netdev_linux_class; extern const struct netdev_class netdev_internal_class; extern const struct netdev_class netdev_tap_class; +extern const struct netdev_class netdev_patch_class; #ifdef __FreeBSD__ extern const struct netdev_class netdev_bsd_class; #endif diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 60437b989..cd6ae5444 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -29,6 +29,7 @@ #include "byte-order.h" #include "daemon.h" #include "dirs.h" +#include "dpif.h" #include "dpif-linux.h" #include "hash.h" #include "hmap.h" @@ -56,21 +57,21 @@ VLOG_DEFINE_THIS_MODULE(netdev_vport); struct netdev_dev_vport { struct netdev_dev netdev_dev; - struct ofpbuf *options; unsigned int change_seq; uint8_t etheraddr[ETH_ADDR_LEN]; + + /* Tunnels. */ + struct ofpbuf *options; struct netdev_tunnel_config tnl_cfg; + + /* Patch Ports. */ + struct netdev_stats stats; + char *peer; }; struct vport_class { enum ovs_vport_type type; struct netdev_class netdev_class; - int (*parse_config)(const char *name, const char *type, - const struct smap *args, struct ofpbuf *options, - struct netdev_tunnel_config *tnl_cfg); - int (*unparse_config)(const char *name, const char *type, - const struct nlattr *options, size_t options_len, - struct smap *args); }; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); @@ -140,6 +141,12 @@ netdev_vport_get_vport_type(const struct netdev *netdev) : OVS_VPORT_TYPE_UNSPEC); } +bool +netdev_vport_is_patch(const struct netdev *netdev) +{ + return netdev_vport_get_vport_type(netdev) == OVS_VPORT_TYPE_PATCH; +} + static uint32_t get_u32_or_zero(const struct nlattr *a) { @@ -220,6 +227,7 @@ netdev_vport_destroy(struct netdev_dev *netdev_dev_) ofpbuf_delete(netdev_dev->options); route_table_unregister(); + free(netdev_dev->peer); free(netdev_dev); } @@ -237,83 +245,6 @@ netdev_vport_close(struct netdev *netdev) free(netdev); } -static int -netdev_vport_get_config(struct netdev_dev *dev_, struct smap *args) -{ - const struct netdev_class *netdev_class = netdev_dev_get_class(dev_); - const struct vport_class *vport_class = vport_class_cast(netdev_class); - struct netdev_dev_vport *dev = netdev_dev_vport_cast(dev_); - const char *name = netdev_dev_get_name(dev_); - int error; - - if (!dev->options) { - struct dpif_linux_vport reply; - struct ofpbuf *buf; - - error = dpif_linux_vport_get(name, &reply, &buf); - if (error) { - VLOG_ERR_RL(&rl, "%s: vport query failed (%s)", - name, strerror(error)); - return error; - } - - dev->options = ofpbuf_clone_data(reply.options, reply.options_len); - ofpbuf_delete(buf); - } - - error = vport_class->unparse_config(name, netdev_class->type, - dev->options->data, - dev->options->size, - args); - if (error) { - VLOG_ERR_RL(&rl, "%s: failed to parse kernel config (%s)", - name, strerror(error)); - } - return error; -} - -static int -netdev_vport_set_config(struct netdev_dev *dev_, const struct smap *args) -{ - const struct netdev_class *netdev_class = netdev_dev_get_class(dev_); - const struct vport_class *vport_class = vport_class_cast(netdev_class); - struct netdev_dev_vport *dev = netdev_dev_vport_cast(dev_); - const char *name = netdev_dev_get_name(dev_); - struct netdev_tunnel_config tnl_cfg; - struct ofpbuf *options; - int error; - - options = ofpbuf_new(64); - error = vport_class->parse_config(name, netdev_dev_get_type(dev_), - args, options, &tnl_cfg); - if (!error - && (!dev->options - || options->size != dev->options->size - || memcmp(options->data, dev->options->data, options->size))) { - struct dpif_linux_vport vport; - - dpif_linux_vport_init(&vport); - vport.cmd = OVS_VPORT_CMD_SET; - vport.name = name; - vport.options = options->data; - vport.options_len = options->size; - error = dpif_linux_vport_transact(&vport, NULL, NULL); - if (!error || error == ENODEV) { - /* Either reconfiguration succeeded or this vport is not installed - * in the kernel (e.g. it hasn't been added to a dpif yet with - * dpif_port_add()). */ - ofpbuf_delete(dev->options); - dev->options = options; - dev->tnl_cfg = tnl_cfg; - options = NULL; - error = 0; - } - } - ofpbuf_delete(options); - - return error; -} - static int netdev_vport_set_etheraddr(struct netdev *netdev, const uint8_t mac[ETH_ADDR_LEN]) @@ -449,7 +380,7 @@ netdev_vport_poll_notify(const struct netdev *netdev) } } -/* Code specific to individual vport types. */ +/* Code specific to tunnel types. */ static ovs_be64 parse_key(const struct smap *args, const char *name, @@ -479,13 +410,16 @@ parse_key(const struct smap *args, const char *name, } static int -parse_tunnel_config(const char *name, const char *type, - const struct smap *args, struct ofpbuf *options, - struct netdev_tunnel_config *tnl_cfg_) +set_tunnel_config(struct netdev_dev *dev_, const struct smap *args) { + struct netdev_dev_vport *dev = netdev_dev_vport_cast(dev_); + const char *name = netdev_dev_get_name(dev_); + const char *type = netdev_dev_get_type(dev_); bool ipsec_mech_set, needs_dst_port, has_csum; struct netdev_tunnel_config tnl_cfg; struct smap_node *node; + struct ofpbuf *options; + int error = EINVAL; uint8_t flags; flags = TNL_F_DF_DEFAULT; @@ -493,6 +427,8 @@ parse_tunnel_config(const char *name, const char *type, ipsec_mech_set = false; memset(&tnl_cfg, 0, sizeof tnl_cfg); + options = ofpbuf_new(64); + if (!strcmp(type, "capwap")) { VLOG_WARN_ONCE("CAPWAP tunnel support is deprecated."); } @@ -572,7 +508,7 @@ parse_tunnel_config(const char *name, const char *type, if (!use_ssl_cert || strcmp(use_ssl_cert, "true")) { VLOG_ERR("%s: 'peer_cert' requires 'certificate' argument", name); - return EINVAL; + goto exit; } ipsec_mech_set = true; } @@ -610,25 +546,25 @@ parse_tunnel_config(const char *name, const char *type, if (pid < 0) { VLOG_ERR("%s: IPsec requires the ovs-monitor-ipsec daemon", name); - return EINVAL; + goto exit; } if (smap_get(args, "peer_cert") && smap_get(args, "psk")) { VLOG_ERR("%s: cannot define both 'peer_cert' and 'psk'", name); - return EINVAL; + goto exit; } if (!ipsec_mech_set) { VLOG_ERR("%s: IPsec requires an 'peer_cert' or psk' argument", name); - return EINVAL; + goto exit; } } if (!tnl_cfg.ip_dst) { VLOG_ERR("%s: %s type requires valid 'remote_ip' argument", name, type); - return EINVAL; + goto exit; } nl_msg_put_be32(options, OVS_TUNNEL_ATTR_DST_IPV4, tnl_cfg.ip_dst); @@ -660,9 +596,34 @@ parse_tunnel_config(const char *name, const char *type, } nl_msg_put_u32(options, OVS_TUNNEL_ATTR_FLAGS, flags); - *tnl_cfg_ = tnl_cfg; + dev->tnl_cfg = tnl_cfg; - return 0; + error = 0; + if (!dev->options + || options->size != dev->options->size + || memcmp(options->data, dev->options->data, options->size)) { + struct dpif_linux_vport vport; + + dpif_linux_vport_init(&vport); + vport.cmd = OVS_VPORT_CMD_SET; + vport.name = name; + vport.options = options->data; + vport.options_len = options->size; + error = dpif_linux_vport_transact(&vport, NULL, NULL); + if (!error || error == ENODEV) { + /* Either reconfiguration succeeded or this vport is not installed + * in the kernel (e.g. it hasn't been added to a dpif yet with + * dpif_port_add()). */ + ofpbuf_delete(dev->options); + dev->options = options; + options = NULL; + error = 0; + } + } + +exit: + ofpbuf_delete(options); + return error; } static int @@ -696,16 +657,34 @@ get_be64_or_zero(const struct nlattr *a) } static int -unparse_tunnel_config(const char *name OVS_UNUSED, const char *type OVS_UNUSED, - const struct nlattr *options, size_t options_len, - struct smap *args) +get_tunnel_config(struct netdev_dev *dev_, struct smap *args) { + struct netdev_dev_vport *dev = netdev_dev_vport_cast(dev_); + const char *name = netdev_dev_get_name(dev_); struct nlattr *a[OVS_TUNNEL_ATTR_MAX + 1]; uint32_t flags; int error; - error = tnl_port_config_from_nlattr(options, options_len, a); + if (!dev->options) { + struct dpif_linux_vport reply; + struct ofpbuf *buf; + + error = dpif_linux_vport_get(name, &reply, &buf); + if (error) { + VLOG_ERR_RL(&rl, "%s: vport query failed (%s)", name, + strerror(error)); + return error; + } + + dev->options = ofpbuf_clone_data(reply.options, reply.options_len); + ofpbuf_delete(buf); + } + + error = tnl_port_config_from_nlattr(dev->options->data, dev->options->size, + a); if (error) { + VLOG_ERR_RL(&rl, "%s: failed to parse kernel config (%s)", + name, strerror(error)); return error; } @@ -779,16 +758,57 @@ unparse_tunnel_config(const char *name OVS_UNUSED, const char *type OVS_UNUSED, return 0; } + +/* Code specific to patch ports. */ + +const char * +netdev_vport_patch_peer(const struct netdev *netdev) +{ + return netdev_vport_is_patch(netdev) + ? netdev_vport_get_dev(netdev)->peer + : NULL; +} + +void +netdev_vport_patch_inc_rx(const struct netdev *netdev, + const struct dpif_flow_stats *stats) +{ + if (netdev_vport_is_patch(netdev)) { + struct netdev_dev_vport *dev = netdev_vport_get_dev(netdev); + dev->stats.rx_packets += stats->n_packets; + dev->stats.rx_bytes += stats->n_bytes; + } +} + +void +netdev_vport_patch_inc_tx(const struct netdev *netdev, + const struct dpif_flow_stats *stats) +{ + if (netdev_vport_is_patch(netdev)) { + struct netdev_dev_vport *dev = netdev_vport_get_dev(netdev); + dev->stats.tx_packets += stats->n_packets; + dev->stats.tx_bytes += stats->n_bytes; + } +} + +static int +get_patch_config(struct netdev_dev *dev_, struct smap *args) +{ + struct netdev_dev_vport *dev = netdev_dev_vport_cast(dev_); + + if (dev->peer) { + smap_add(args, "peer", dev->peer); + } + return 0; +} static int -parse_patch_config(const char *name, const char *type OVS_UNUSED, - const struct smap *args, struct ofpbuf *options, - struct netdev_tunnel_config *tnl_cfg) +set_patch_config(struct netdev_dev *dev_, const struct smap *args) { + struct netdev_dev_vport *dev = netdev_dev_vport_cast(dev_); + const char *name = netdev_dev_get_name(dev_); const char *peer; - memset(tnl_cfg, 0, sizeof *tnl_cfg); - peer = smap_get(args, "peer"); if (!peer) { VLOG_ERR("%s: patch type requires valid 'peer' argument", name); @@ -800,54 +820,36 @@ parse_patch_config(const char *name, const char *type OVS_UNUSED, return EINVAL; } - if (strlen(peer) >= IFNAMSIZ) { - VLOG_ERR("%s: patch 'peer' arg too long", name); - return EINVAL; - } - if (!strcmp(name, peer)) { VLOG_ERR("%s: patch peer must not be self", name); return EINVAL; } - nl_msg_put_string(options, OVS_PATCH_ATTR_PEER, peer); + free(dev->peer); + dev->peer = xstrdup(peer); return 0; } static int -unparse_patch_config(const char *name OVS_UNUSED, const char *type OVS_UNUSED, - const struct nlattr *options, size_t options_len, - struct smap *args) -{ - static const struct nl_policy ovs_patch_policy[] = { - [OVS_PATCH_ATTR_PEER] = { .type = NL_A_STRING, - .max_len = IFNAMSIZ, - .optional = false } - }; - - struct nlattr *a[ARRAY_SIZE(ovs_patch_policy)]; - struct ofpbuf buf; - - ofpbuf_use_const(&buf, options, options_len); - if (!nl_policy_parse(&buf, 0, ovs_patch_policy, - a, ARRAY_SIZE(ovs_patch_policy))) { - return EINVAL; - } - - smap_add(args, "peer", nl_attr_get_string(a[OVS_PATCH_ATTR_PEER])); +patch_get_stats(const struct netdev *netdev, struct netdev_stats *stats) +{ + struct netdev_dev_vport *dev = netdev_vport_get_dev(netdev); + memcpy(stats, &dev->stats, sizeof *stats); return 0; } -#define VPORT_FUNCTIONS(GET_TUNNEL_CONFIG, GET_STATUS) \ +#define VPORT_FUNCTIONS(GET_CONFIG, SET_CONFIG, \ + GET_TUNNEL_CONFIG, GET_STATS, \ + GET_STATUS) \ NULL, \ netdev_vport_run, \ netdev_vport_wait, \ \ netdev_vport_create, \ netdev_vport_destroy, \ - netdev_vport_get_config, \ - netdev_vport_set_config, \ + GET_CONFIG, \ + SET_CONFIG, \ GET_TUNNEL_CONFIG, \ \ netdev_vport_open, \ @@ -869,7 +871,7 @@ unparse_patch_config(const char *name OVS_UNUSED, const char *type OVS_UNUSED, NULL, /* get_carrier */ \ NULL, /* get_carrier_resets */ \ NULL, /* get_miimon */ \ - netdev_vport_get_stats, \ + GET_STATS, \ NULL, /* set_stats */ \ \ NULL, /* get_features */ \ @@ -901,9 +903,11 @@ unparse_patch_config(const char *name OVS_UNUSED, const char *type OVS_UNUSED, #define TUNNEL_CLASS(NAME, VPORT_TYPE) \ { VPORT_TYPE, \ - { NAME, VPORT_FUNCTIONS(get_netdev_tunnel_config, \ - tunnel_get_status) }, \ - parse_tunnel_config, unparse_tunnel_config } + { NAME, VPORT_FUNCTIONS(get_tunnel_config, \ + set_tunnel_config, \ + get_netdev_tunnel_config, \ + netdev_vport_get_stats, \ + tunnel_get_status) }} void netdev_vport_register(void) @@ -917,8 +921,11 @@ netdev_vport_register(void) TUNNEL_CLASS("vxlan", OVS_VPORT_TYPE_VXLAN), { OVS_VPORT_TYPE_PATCH, - { "patch", VPORT_FUNCTIONS(NULL, NULL) }, - parse_patch_config, unparse_patch_config } + { "patch", VPORT_FUNCTIONS(get_patch_config, + set_patch_config, + NULL, + patch_get_stats, + NULL) }}, }; int i; diff --git a/lib/netdev-vport.h b/lib/netdev-vport.h index 31c119870..b372a7448 100644 --- a/lib/netdev-vport.h +++ b/lib/netdev-vport.h @@ -18,9 +18,9 @@ #define NETDEV_VPORT_H 1 #include -#include "openvswitch/types.h" struct dpif_linux_vport; +struct dpif_flow_stats; struct netdev; struct netdev_stats; @@ -30,7 +30,15 @@ const struct ofpbuf *netdev_vport_get_options(const struct netdev *); enum ovs_vport_type netdev_vport_get_vport_type(const struct netdev *); const char *netdev_vport_get_netdev_type(const struct dpif_linux_vport *); +bool netdev_vport_is_patch(const struct netdev *); int netdev_vport_get_stats(const struct netdev *, struct netdev_stats *); +const char *netdev_vport_patch_peer(const struct netdev *netdev); + +void netdev_vport_patch_inc_rx(const struct netdev *, + const struct dpif_flow_stats *); +void netdev_vport_patch_inc_tx(const struct netdev *, + const struct dpif_flow_stats *); + #endif /* netdev-vport.h */ diff --git a/lib/netdev.c b/lib/netdev.c index 0a2e7c51e..3909ab21d 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -1441,6 +1441,14 @@ netdev_get_type(const struct netdev *netdev) return netdev_get_dev(netdev)->netdev_class->type; } + +const char * +netdev_get_type_from_name(const char *name) +{ + const struct netdev_dev *dev = netdev_dev_from_name(name); + return dev ? netdev_dev_get_type(dev) : NULL; +} + struct netdev_dev * netdev_get_dev(const struct netdev *netdev) { diff --git a/lib/netdev.h b/lib/netdev.h index a54413172..a691d70b2 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -127,6 +127,7 @@ const struct netdev_tunnel_config * /* Basic properties. */ const char *netdev_get_name(const struct netdev *); const char *netdev_get_type(const struct netdev *); +const char *netdev_get_type_from_name(const char *); int netdev_get_mtu(const struct netdev *, int *mtup); int netdev_set_mtu(const struct netdev *, int mtu); int netdev_get_ifindex(const struct netdev *); diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 2f782671b..d75a63ccc 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -36,6 +36,7 @@ #include "mac-learning.h" #include "meta-flow.h" #include "multipath.h" +#include "netdev-vport.h" #include "netdev.h" #include "netlink.h" #include "nx-match.h" @@ -295,6 +296,8 @@ static void xlate_actions(struct action_xlate_ctx *, static void xlate_actions_for_side_effects(struct action_xlate_ctx *, const struct ofpact *ofpacts, size_t ofpacts_len); +static void xlate_table_action(struct action_xlate_ctx *, uint16_t in_port, + uint8_t table_id, bool may_packet_in); static size_t put_userspace_action(const struct ofproto_dpif *, struct ofpbuf *odp_actions, @@ -676,7 +679,8 @@ struct ofproto_dpif { struct hmap vlandev_map; /* vlandev -> (realdev,vid). */ /* Ports. */ - struct sset ports; /* Set of port names. */ + struct sset ports; /* Set of standard port names. */ + struct sset ghost_ports; /* Ports with no datapath port. */ struct sset port_poll_set; /* Queued names for port_poll() reply. */ int port_poll_errno; /* Last errno for port_poll() reply. */ }; @@ -1158,6 +1162,7 @@ construct(struct ofproto *ofproto_) hmap_init(&ofproto->realdev_vid_map); sset_init(&ofproto->ports); + sset_init(&ofproto->ghost_ports); sset_init(&ofproto->port_poll_set); ofproto->port_poll_errno = 0; @@ -1302,6 +1307,7 @@ destruct(struct ofproto *ofproto_) hmap_destroy(&ofproto->realdev_vid_map); sset_destroy(&ofproto->ports); + sset_destroy(&ofproto->ghost_ports); sset_destroy(&ofproto->port_poll_set); close_dpif_backer(ofproto->backer); @@ -1535,6 +1541,12 @@ port_construct(struct ofport *port_) port->vlandev_vid = 0; port->carrier_seq = netdev_get_carrier_resets(port->up.netdev); + if (netdev_vport_is_patch(port->up.netdev)) { + /* XXX By bailing out here, we don't do required sFlow work. */ + port->odp_port = OVSP_NONE; + return 0; + } + error = dpif_port_query_by_name(ofproto->backer->dpif, netdev_get_name(port->up.netdev), &dpif_port); @@ -1577,8 +1589,12 @@ port_destruct(struct ofport *port_) dpif_port_del(ofproto->backer->dpif, port->odp_port); } + if (port->odp_port != OVSP_NONE) { + hmap_remove(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node); + } + sset_find_and_delete(&ofproto->ports, devname); - hmap_remove(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node); + sset_find_and_delete(&ofproto->ghost_ports, devname); ofproto->backer->need_revalidate = REV_RECONFIGURE; bundle_remove(port_); set_cfm(port_, NULL); @@ -2820,6 +2836,28 @@ ofproto_port_from_dpif_port(struct ofproto_dpif *ofproto, ofproto_port->ofp_port = odp_port_to_ofp_port(ofproto, dpif_port->port_no); } +static struct ofport_dpif * +ofport_get_peer(const struct ofport_dpif *ofport_dpif) +{ + const struct ofproto_dpif *ofproto; + const char *peer; + + peer = netdev_vport_patch_peer(ofport_dpif->up.netdev); + if (!peer) { + return NULL; + } + + HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { + struct ofport *ofport; + + ofport = shash_find_data(&ofproto->up.port_by_name, peer); + if (ofport && ofport->ofproto->ofproto_class == &ofproto_dpif_class) { + return ofport_dpif_cast(ofport); + } + } + return NULL; +} + static void port_run_fast(struct ofport_dpif *ofport) { @@ -2888,6 +2926,24 @@ port_query_by_name(const struct ofproto *ofproto_, const char *devname, struct dpif_port dpif_port; int error; + if (sset_contains(&ofproto->ghost_ports, devname)) { + const char *type = netdev_get_type_from_name(devname); + + /* We may be called before ofproto->up.port_by_name is populated with + * the appropriate ofport. For this reason, we must get the name and + * type from the netdev layer directly. */ + if (type) { + const struct ofport *ofport; + + ofport = shash_find_data(&ofproto->up.port_by_name, devname); + ofproto_port->ofp_port = ofport ? ofport->ofp_port : OFPP_NONE; + ofproto_port->name = xstrdup(devname); + ofproto_port->type = xstrdup(type); + return 0; + } + return ENODEV; + } + if (!sset_contains(&ofproto->ports, devname)) { return ENODEV; } @@ -2906,6 +2962,11 @@ port_add(struct ofproto *ofproto_, struct netdev *netdev) uint32_t odp_port = UINT32_MAX; int error; + if (netdev_vport_is_patch(netdev)) { + sset_add(&ofproto->ghost_ports, netdev_get_name(netdev)); + return 0; + } + error = dpif_port_add(ofproto->backer->dpif, netdev, &odp_port); if (!error) { sset_add(&ofproto->ports, netdev_get_name(netdev)); @@ -2996,16 +3057,13 @@ ofproto_update_local_port_stats(const struct ofproto *ofproto_, struct port_dump_state { uint32_t bucket; uint32_t offset; + bool ghost; }; static int port_dump_start(const struct ofproto *ofproto_ OVS_UNUSED, void **statep) { - struct port_dump_state *state; - - *statep = state = xmalloc(sizeof *state); - state->bucket = 0; - state->offset = 0; + *statep = xzalloc(sizeof(struct port_dump_state)); return 0; } @@ -3015,10 +3073,11 @@ port_dump_next(const struct ofproto *ofproto_ OVS_UNUSED, void *state_, { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct port_dump_state *state = state_; + const struct sset *sset; struct sset_node *node; - while ((node = sset_at_position(&ofproto->ports, &state->bucket, - &state->offset))) { + sset = state->ghost ? &ofproto->ghost_ports : &ofproto->ports; + while ((node = sset_at_position(sset, &state->bucket, &state->offset))) { int error; error = port_query_by_name(ofproto_, node->name, port); @@ -3027,6 +3086,13 @@ port_dump_next(const struct ofproto *ofproto_ OVS_UNUSED, void *state_, } } + if (!state->ghost) { + state->ghost = true; + state->bucket = 0; + state->offset = 0; + return port_dump_next(ofproto_, state_, port); + } + return EOF; } @@ -4103,9 +4169,7 @@ facet_free(struct facet *facet) } /* Executes, within 'ofproto', the 'n_actions' actions in 'actions' on - * 'packet', which arrived on 'in_port'. - * - * Takes ownership of 'packet'. */ + * 'packet', which arrived on 'in_port'. */ static bool execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow, const struct nlattr *odp_actions, size_t actions_len, @@ -4121,8 +4185,6 @@ execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow, error = dpif_execute(ofproto->backer->dpif, key.data, key.size, odp_actions, actions_len, packet); - - ofpbuf_delete(packet); return !error; } @@ -5174,11 +5236,10 @@ rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes) } } -static enum ofperr -rule_execute(struct rule *rule_, const struct flow *flow, - struct ofpbuf *packet) +static void +rule_dpif_execute(struct rule_dpif *rule, const struct flow *flow, + struct ofpbuf *packet) { - struct rule_dpif *rule = rule_dpif_cast(rule_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); struct dpif_flow_stats stats; @@ -5200,7 +5261,14 @@ rule_execute(struct rule *rule_, const struct flow *flow, odp_actions.size, packet); ofpbuf_uninit(&odp_actions); +} +static enum ofperr +rule_execute(struct rule *rule, const struct flow *flow, + struct ofpbuf *packet) +{ + rule_dpif_execute(rule_dpif_cast(rule), flow, packet); + ofpbuf_delete(packet); return 0; } @@ -5226,6 +5294,29 @@ send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet) int error; flow_extract(packet, 0, 0, NULL, OFPP_LOCAL, &flow); + if (netdev_vport_is_patch(ofport->up.netdev)) { + struct ofproto_dpif *peer_ofproto; + struct dpif_flow_stats stats; + struct ofport_dpif *peer; + struct rule_dpif *rule; + + peer = ofport_get_peer(ofport); + if (!peer) { + return ENODEV; + } + + dpif_flow_stats_extract(&flow, packet, time_msec(), &stats); + netdev_vport_patch_inc_tx(ofport->up.netdev, &stats); + netdev_vport_patch_inc_rx(peer->up.netdev, &stats); + + flow.in_port = peer->up.ofp_port; + peer_ofproto = ofproto_dpif_cast(peer->up.ofproto); + rule = rule_dpif_lookup(peer_ofproto, &flow); + rule_dpif_execute(rule, &flow, packet); + + return 0; + } + odp_port = vsp_realdev_to_vlandev(ofproto, ofport->odp_port, flow.vlan_tci); if (odp_port != ofport->odp_port) { @@ -5412,11 +5503,14 @@ compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port, bool check_stp) { const struct ofport_dpif *ofport = get_ofp_port(ctx->ofproto, ofp_port); - uint32_t odp_port = ofp_port_to_odp_port(ctx->ofproto, ofp_port); ovs_be16 flow_vlan_tci = ctx->flow.vlan_tci; uint8_t flow_nw_tos = ctx->flow.nw_tos; struct priority_to_dscp *pdscp; - uint32_t out_port; + uint32_t out_port, odp_port; + + /* If 'struct flow' gets additional metadata, we'll need to zero it out + * before traversing a patch port. */ + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 18); if (!ofport) { xlate_report(ctx, "Nonexistent output port"); @@ -5429,12 +5523,46 @@ compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port, return; } + if (netdev_vport_is_patch(ofport->up.netdev)) { + struct ofport_dpif *peer = ofport_get_peer(ofport); + struct flow old_flow = ctx->flow; + const struct ofproto_dpif *peer_ofproto; + + if (!peer) { + xlate_report(ctx, "Nonexistent patch port peer"); + return; + } + + peer_ofproto = ofproto_dpif_cast(peer->up.ofproto); + if (peer_ofproto->backer != ctx->ofproto->backer) { + xlate_report(ctx, "Patch port peer on a different datapath"); + return; + } + + ctx->ofproto = ofproto_dpif_cast(peer->up.ofproto); + ctx->flow.in_port = peer->up.ofp_port; + ctx->flow.metadata = htonll(0); + memset(&ctx->flow.tunnel, 0, sizeof ctx->flow.tunnel); + memset(ctx->flow.regs, 0, sizeof ctx->flow.regs); + xlate_table_action(ctx, ctx->flow.in_port, 0, true); + ctx->flow = old_flow; + ctx->ofproto = ofproto_dpif_cast(ofport->up.ofproto); + + if (ctx->resubmit_stats) { + netdev_vport_patch_inc_tx(ofport->up.netdev, ctx->resubmit_stats); + netdev_vport_patch_inc_rx(peer->up.netdev, ctx->resubmit_stats); + } + + return; + } + pdscp = get_priority(ofport, ctx->flow.skb_priority); if (pdscp) { ctx->flow.nw_tos &= ~IP_DSCP_MASK; ctx->flow.nw_tos |= pdscp->dscp; } + odp_port = ofp_port_to_odp_port(ctx->ofproto, ofp_port); out_port = vsp_realdev_to_vlandev(ctx->ofproto, odp_port, ctx->flow.vlan_tci); if (out_port != odp_port) { @@ -7544,9 +7672,17 @@ show_dp_format(const struct ofproto_dpif *ofproto, struct ds *ds) struct ofport *ofport = node->data; const char *name = netdev_get_name(ofport->netdev); const char *type = netdev_get_type(ofport->netdev); + uint32_t odp_port; + + ds_put_format(ds, "\t%s %u/", name, ofport->ofp_port); + + odp_port = ofp_port_to_odp_port(ofproto, ofport->ofp_port); + if (odp_port != OVSP_NONE) { + ds_put_format(ds, "%"PRIu32":", odp_port); + } else { + ds_put_cstr(ds, "none:"); + } - ds_put_format(ds, "\t%s %u/%u:", name, ofport->ofp_port, - ofp_port_to_odp_port(ofproto, ofport->ofp_port)); if (strcmp(type, "system")) { struct netdev *netdev; int error; diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index a14c41261..067c1da28 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -1324,3 +1324,60 @@ in_port(3),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv OVS_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([ofproto-dpif - patch ports]) +OVS_VSWITCHD_START([add-br br1 \ +-- set bridge br1 datapath-type=dummy fail-mode=secure \ +-- add-port br1 pbr1 -- set int pbr1 type=patch options:peer=pbr0 \ +-- add-port br0 pbr0 -- set int pbr0 type=patch options:peer=pbr1]) + +ADD_OF_PORTS([br0], [2]) +ADD_OF_PORTS([br1], [3]) + +AT_CHECK([ovs-ofctl add-flow br0 actions=LOCAL,output:1,output:2]) +AT_CHECK([ovs-ofctl add-flow br1 actions=LOCAL,output:1,output:3]) + +for i in $(seq 1 10); do + ovs-appctl netdev-dummy/receive br0 'in_port(100),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' +done + +for i in $(seq 1 5); do + ovs-appctl netdev-dummy/receive br1 'in_port(101),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:05),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' +done + +AT_CHECK([ovs-appctl dpif/show], [0], [dnl +br0 (dummy@ovs-dummy): + lookups: hit:13 missed:2 lost:0 + flows: 1 + br0 65534/100: (dummy) + p2 2/2: (dummy) + pbr0 1/none: (patch: peer=pbr1) +br1 (dummy@ovs-dummy): + lookups: hit:13 missed:2 lost:0 + flows: 1 + br1 65534/101: (dummy) + p3 3/3: (dummy) + pbr1 1/none: (patch: peer=pbr0) +]) + +AT_CHECK([ovs-appctl dpif/dump-flows br0 | STRIP_USED], [0], [dnl +in_port(100),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0), packets:9, bytes:540, used:0.0s, actions:101,3,2 +]), +AT_CHECK([ovs-appctl dpif/dump-flows br1 | STRIP_USED], [0], [dnl +in_port(101),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:05),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0), packets:4, bytes:240, used:0.0s, actions:100,2,3 +]) + +AT_CHECK([ovs-ofctl dump-ports br0 pbr0], [0], [dnl +OFPST_PORT reply (xid=0x4): 1 ports + port 1: rx pkts=5, bytes=300, drop=0, errs=0, frame=0, over=0, crc=0 + tx pkts=10, bytes=600, drop=0, errs=0, coll=0 +]) + +AT_CHECK([ovs-ofctl dump-ports br1 pbr1], [0], [dnl +OFPST_PORT reply (xid=0x4): 1 ports + port 1: rx pkts=10, bytes=600, drop=0, errs=0, frame=0, over=0, crc=0 + tx pkts=5, bytes=300, drop=0, errs=0, coll=0 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP -- 2.43.0