From 572f732ab0789f66dba46825218f0cc62b8fb8c2 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Tue, 4 Mar 2014 15:36:03 -0800 Subject: [PATCH] dpif-netdev: user space datapath recirculation Add basic recirculation infrastructure and user space data path support for it. The following bond mega flow patch will make use of this infrastructure. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- include/linux/openvswitch.h | 29 ++++++++++- lib/dpif-netdev.c | 20 +++++++- lib/dpif.c | 3 +- lib/odp-execute.c | 9 ++++ lib/odp-execute.h | 2 +- lib/odp-util.c | 98 +++++++++++++++++++++++++++++++++++-- lib/packets.h | 9 +++- ofproto/ofproto-dpif.h | 58 ++++++++++++++++++++++ 8 files changed, 218 insertions(+), 10 deletions(-) diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index e39e4377b..d9282d675 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -307,11 +307,13 @@ enum ovs_key_attr { OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */ OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */ OVS_KEY_ATTR_TCP_FLAGS, /* be16 TCP flags. */ - #ifdef __KERNEL__ OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */ #endif + OVS_KEY_ATTR_DP_HASH = 20, /* u32 hash value */ + OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */ + OVS_KEY_ATTR_MPLS = 62, /* array of struct ovs_key_mpls. * The implementation may restrict * the accepted length of the array. */ @@ -532,6 +534,29 @@ struct ovs_action_push_vlan { __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */ }; +/* Data path hash algorithm for computing Datapath hash. + * + * The Algorithm type only specifies the fields in a flow + * will be used as part of the hash. Each datapath is free + * to use its own hash algorithm. The hash value will be + * opaque to the user space daemon. + */ +enum ovs_recirc_hash_alg { + OVS_RECIRC_HASH_ALG_NONE, + OVS_RECIRC_HASH_ALG_L4, +}; +/* + * struct ovs_action_recirc - %OVS_ACTION_ATTR_RECIRC action argument. + * @recirc_id: The Recirculation label, Zero is invalid. + * @hash_alg: Algorithm used to compute hash prior to recirculation. + * @hash_bias: bias used for computing hash. used to compute hash prior to recirculation. + */ +struct ovs_action_recirc { + uint32_t hash_alg; /* One of ovs_dp_hash_alg. */ + uint32_t hash_bias; + uint32_t recirc_id; /* Recirculation label. */ +}; + /** * enum ovs_action_attr - Action types. * @@ -555,6 +580,7 @@ struct ovs_action_push_vlan { * indicate the new packet contents. This could potentially still be * %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there * is no MPLS label stack, as determined by ethertype, no action is taken. + * @OVS_ACTION_RECIRC: Recirculate within the data path. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -571,6 +597,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */ OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */ + OVS_ACTION_ATTR_RECIRC, /* struct ovs_action_recirc. */ __OVS_ACTION_ATTR_MAX }; diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 4d0462907..8687a4725 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2082,7 +2082,7 @@ struct dp_netdev_execute_aux { static void dp_execute_cb(void *aux_, struct ofpbuf *packet, - const struct pkt_metadata *md OVS_UNUSED, + struct pkt_metadata *md, const struct nlattr *a, bool may_steal) OVS_NO_THREAD_SAFETY_ANALYSIS { @@ -2114,6 +2114,24 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, } break; } + + case OVS_ACTION_ATTR_RECIRC: { + const struct ovs_action_recirc *act; + act = nl_attr_get(a); + md->recirc_id =act->recirc_id; + md->dp_hash = 0; + + if (act->hash_alg == OVS_RECIRC_HASH_ALG_L4) { + struct flow flow; + + flow_extract(packet, md, &flow); + md->dp_hash = flow_hash_symmetric_l4(&flow, act->hash_bias); + } + + dp_netdev_port_input(aux->dp, packet, md); + break; + } + case OVS_ACTION_ATTR_PUSH_VLAN: case OVS_ACTION_ATTR_POP_VLAN: case OVS_ACTION_ATTR_PUSH_MPLS: diff --git a/lib/dpif.c b/lib/dpif.c index dbf1c101d..664917663 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1108,7 +1108,7 @@ struct dpif_execute_helper_aux { * meaningful. */ static void dpif_execute_helper_cb(void *aux_, struct ofpbuf *packet, - const struct pkt_metadata *md, + struct pkt_metadata *md, const struct nlattr *action, bool may_steal OVS_UNUSED) { struct dpif_execute_helper_aux *aux = aux_; @@ -1133,6 +1133,7 @@ dpif_execute_helper_cb(void *aux_, struct ofpbuf *packet, case OVS_ACTION_ATTR_SET: case OVS_ACTION_ATTR_SAMPLE: case OVS_ACTION_ATTR_UNSPEC: + case OVS_ACTION_ATTR_RECIRC: case __OVS_ACTION_ATTR_MAX: OVS_NOT_REACHED(); } diff --git a/lib/odp-execute.c b/lib/odp-execute.c index cf33eb779..6e04816a5 100644 --- a/lib/odp-execute.c +++ b/lib/odp-execute.c @@ -125,6 +125,14 @@ odp_execute_set_action(struct ofpbuf *packet, const struct nlattr *a, set_arp(packet, nl_attr_get_unspec(a, sizeof(struct ovs_key_arp))); break; + case OVS_KEY_ATTR_DP_HASH: + md->dp_hash = nl_attr_get_u32(a); + break; + + case OVS_KEY_ATTR_RECIRC_ID: + md->recirc_id = nl_attr_get_u32(a); + break; + case OVS_KEY_ATTR_UNSPEC: case OVS_KEY_ATTR_ENCAP: case OVS_KEY_ATTR_ETHERTYPE: @@ -197,6 +205,7 @@ odp_execute_actions__(void *dp, struct ofpbuf *packet, bool steal, /* These only make sense in the context of a datapath. */ case OVS_ACTION_ATTR_OUTPUT: case OVS_ACTION_ATTR_USERSPACE: + case OVS_ACTION_ATTR_RECIRC: if (dp_execute_action) { bool may_steal; /* Allow 'dp_execute_action' to steal the packet data if we do diff --git a/lib/odp-execute.h b/lib/odp-execute.h index 6f1b9bd77..91f0c5183 100644 --- a/lib/odp-execute.h +++ b/lib/odp-execute.h @@ -28,7 +28,7 @@ struct ofpbuf; struct pkt_metadata; typedef void (*odp_execute_cb)(void *dp, struct ofpbuf *packet, - const struct pkt_metadata *, + struct pkt_metadata *, const struct nlattr *action, bool may_steal); /* Actions that need to be executed in the context of a datapath are handed diff --git a/lib/odp-util.c b/lib/odp-util.c index 7c6aad4f3..956fef11b 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -79,6 +79,7 @@ odp_action_len(uint16_t type) case OVS_ACTION_ATTR_POP_VLAN: return 0; case OVS_ACTION_ATTR_PUSH_MPLS: return sizeof(struct ovs_action_push_mpls); case OVS_ACTION_ATTR_POP_MPLS: return sizeof(ovs_be16); + case OVS_ACTION_ATTR_RECIRC: return sizeof(struct ovs_action_recirc); case OVS_ACTION_ATTR_SET: return -2; case OVS_ACTION_ATTR_SAMPLE: return -2; @@ -118,6 +119,8 @@ ovs_key_attr_to_string(enum ovs_key_attr attr, char *namebuf, size_t bufsize) case OVS_KEY_ATTR_ARP: return "arp"; case OVS_KEY_ATTR_ND: return "nd"; case OVS_KEY_ATTR_MPLS: return "mpls"; + case OVS_KEY_ATTR_DP_HASH: return "dp_hash"; + case OVS_KEY_ATTR_RECIRC_ID: return "recirc_id"; case __OVS_KEY_ATTR_MAX: default: @@ -383,6 +386,19 @@ format_mpls(struct ds *ds, const struct ovs_key_mpls *mpls_key, } } +static void +format_odp_recirc_action(struct ds *ds, + const struct ovs_action_recirc *act) +{ + ds_put_format(ds, "recirc("); + + if (act->hash_alg == OVS_RECIRC_HASH_ALG_L4) { + ds_put_format(ds, "hash_l4(%"PRIu32"), ", act->hash_bias); + } + + ds_put_format(ds, "%"PRIu32")", act->recirc_id); +} + static void format_odp_action(struct ds *ds, const struct nlattr *a) { @@ -405,6 +421,9 @@ format_odp_action(struct ds *ds, const struct nlattr *a) case OVS_ACTION_ATTR_USERSPACE: format_odp_userspace_action(ds, a); break; + case OVS_ACTION_ATTR_RECIRC: + format_odp_recirc_action(ds, nl_attr_get(a)); + break; case OVS_ACTION_ATTR_SET: ds_put_cstr(ds, "set("); format_odp_key_attr(nl_attr_get(a), NULL, NULL, ds, true); @@ -730,6 +749,8 @@ odp_flow_key_attr_len(uint16_t type) case OVS_KEY_ATTR_ENCAP: return -2; case OVS_KEY_ATTR_PRIORITY: return 4; case OVS_KEY_ATTR_SKB_MARK: return 4; + case OVS_KEY_ATTR_DP_HASH: return 4; + case OVS_KEY_ATTR_RECIRC_ID: return 4; case OVS_KEY_ATTR_TUNNEL: return -2; case OVS_KEY_ATTR_IN_PORT: return 4; case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet); @@ -1025,6 +1046,8 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: + case OVS_KEY_ATTR_DP_HASH: + case OVS_KEY_ATTR_RECIRC_ID: ds_put_format(ds, "%#"PRIx32, nl_attr_get_u32(a)); if (!is_exact) { ds_put_format(ds, "/%#"PRIx32, nl_attr_get_u32(ma)); @@ -1386,7 +1409,6 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, } break; } - case OVS_KEY_ATTR_UNSPEC: case __OVS_KEY_ATTR_MAX: default: @@ -1618,6 +1640,36 @@ parse_odp_key_mask_attr(const char *s, const struct simap *port_names, } } + { + uint32_t recirc_id; + int n = -1; + + if (ovs_scan(s, "recirc_id(%"SCNi32")%n", &recirc_id, &n)) { + nl_msg_put_u32(key, OVS_KEY_ATTR_RECIRC_ID, recirc_id); + nl_msg_put_u32(mask, OVS_KEY_ATTR_RECIRC_ID, UINT32_MAX); + return n; + } + } + + { + uint32_t dp_hash; + uint32_t dp_hash_mask; + int n = -1; + + if (mask && ovs_scan(s, "dp_hash(%"SCNi32"/%"SCNi32")%n", &dp_hash, + &dp_hash_mask, &n)) { + nl_msg_put_u32(key, OVS_KEY_ATTR_DP_HASH, dp_hash); + nl_msg_put_u32(mask, OVS_KEY_ATTR_DP_HASH, dp_hash_mask); + return n; + } else if (ovs_scan(s, "dp_hash(%"SCNi32")%n", &dp_hash, &n)) { + nl_msg_put_u32(key, OVS_KEY_ATTR_DP_HASH, dp_hash); + if (mask) { + nl_msg_put_u32(mask, OVS_KEY_ATTR_DP_HASH, UINT32_MAX); + } + return n; + } + } + { uint64_t tun_id, tun_id_mask; struct flow_tnl tun_key, tun_key_mask; @@ -2438,6 +2490,14 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data, nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, data->pkt_mark); + if (flow->recirc_id) { + nl_msg_put_u32(buf, OVS_KEY_ATTR_RECIRC_ID, data->recirc_id); + } + + if (flow->dp_hash) { + nl_msg_put_u32(buf, OVS_KEY_ATTR_DP_HASH, data->dp_hash); + } + /* Add an ingress port attribute if this is a mask or 'odp_in_port' * is not the magical value "ODPP_NONE". */ if (is_mask || odp_in_port != ODPP_NONE) { @@ -2673,13 +2733,24 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len, continue; } - if (type == OVS_KEY_ATTR_PRIORITY) { + switch (type) { + case OVS_KEY_ATTR_RECIRC_ID: + md->recirc_id = nl_attr_get_u32(nla); + wanted_attrs &= ~(1u << OVS_KEY_ATTR_RECIRC_ID); + break; + case OVS_KEY_ATTR_DP_HASH: + md->dp_hash = nl_attr_get_u32(nla); + wanted_attrs &= ~(1u << OVS_KEY_ATTR_DP_HASH); + break; + case OVS_KEY_ATTR_PRIORITY: md->skb_priority = nl_attr_get_u32(nla); wanted_attrs &= ~(1u << OVS_KEY_ATTR_PRIORITY); - } else if (type == OVS_KEY_ATTR_SKB_MARK) { + break; + case OVS_KEY_ATTR_SKB_MARK: md->pkt_mark = nl_attr_get_u32(nla); wanted_attrs &= ~(1u << OVS_KEY_ATTR_SKB_MARK); - } else if (type == OVS_KEY_ATTR_TUNNEL) { + break; + case OVS_KEY_ATTR_TUNNEL: { enum odp_key_fitness res; res = odp_tun_key_from_attr(nla, &md->tunnel); @@ -2688,9 +2759,14 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len, } else if (res == ODP_FIT_PERFECT) { wanted_attrs &= ~(1u << OVS_KEY_ATTR_TUNNEL); } - } else if (type == OVS_KEY_ATTR_IN_PORT) { + break; + } + case OVS_KEY_ATTR_IN_PORT: md->in_port.odp_port = nl_attr_get_odp_port(nla); wanted_attrs &= ~(1u << OVS_KEY_ATTR_IN_PORT); + break; + default: + break; } if (!wanted_attrs) { @@ -3226,6 +3302,18 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, expected_attrs = 0; /* Metadata. */ + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_RECIRC_ID)) { + flow->recirc_id = nl_attr_get_u32(attrs[OVS_KEY_ATTR_RECIRC_ID]); + expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_RECIRC_ID; + } else if (is_mask) { + /* Always exact match recirc_id when datapath does not sepcify it. */ + flow->recirc_id = UINT32_MAX; + } + + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_DP_HASH)) { + flow->dp_hash = nl_attr_get_u32(attrs[OVS_KEY_ATTR_DP_HASH]); + expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_DP_HASH; + } if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_PRIORITY)) { flow->skb_priority = nl_attr_get_u32(attrs[OVS_KEY_ATTR_PRIORITY]); expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_PRIORITY; diff --git a/lib/packets.h b/lib/packets.h index f6a4f43b6..30e4d13f3 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -33,6 +33,11 @@ struct ds; /* Datapath packet metadata */ struct pkt_metadata { + uint32_t recirc_id; /* Recirculation id carried with the + recirculating packets. 0 for packets + received from the wire. */ + uint32_t dp_hash; /* hash value computed by the recirculation + action. */ struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */ uint32_t skb_priority; /* Packet priority for QoS. */ uint32_t pkt_mark; /* Packet mark. */ @@ -40,13 +45,15 @@ struct pkt_metadata { }; #define PKT_METADATA_INITIALIZER(PORT) \ - (struct pkt_metadata){ { 0, 0, 0, 0, 0, 0}, 0, 0, {(PORT)} } + (struct pkt_metadata){ 0, 0, { 0, 0, 0, 0, 0, 0}, 0, 0, {(PORT)} } static inline struct pkt_metadata pkt_metadata_from_flow(const struct flow *flow) { struct pkt_metadata md; + md.recirc_id = flow->recirc_id; + md.dp_hash = flow->dp_hash; md.tunnel = flow->tunnel; md.skb_priority = flow->skb_priority; md.pkt_mark = flow->pkt_mark; diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index 6fbc6726b..93e6ec055 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -135,6 +135,64 @@ void ofproto_dpif_flow_mod(struct ofproto_dpif *, struct ofputil_flow_mod *); struct ofport_dpif *odp_port_to_ofport(const struct dpif_backer *, odp_port_t); +/* + * Recirculation + * ============= + * + * Recirculation is a technique to allow a frame to re-enter the packet processing + * path for one or multiple times to achieve more flexible packet processing in the + * data path. MPLS handling and selecting bond slave port of a bond ports. + * + * Data path and user space interface + * ----------------------------------- + * + * Two new fields, recirc_id and dp_hash, are added to the current flow data structure. + * They are both both of type uint32_t. In addition, a new action, RECIRC, are added. + * + * The value recirc_id is used to distinguish a packet from multiple iterations of + * recirculation. A packet initially received is considered of having recirc_id of 0. + * Recirc_id is managed by the user space, opaque to the data path. + * + * On the other hand, dp_hash can only be computed by the data path, opaque to + * the user space. In fact, user space may not able to recompute the hash value. + * The dp_hash value should be wildcarded when for a newly received packet. + * RECIRC action specifies whether the hash is computed. If computed, how many + * fields to be included in the hash computation. The computed hash value is + * stored into the dp_hash field prior to recirculation. + * + * The RECIRC action computes and set the dp_hash field, set the recirc_id field + * and then reprocess the packet as if it was received on the same input port. + * RECIRC action works like a function call; actions listed behind the RECIRC + * action will be executed after its execution. RECIRC action can be nested, + * data path implementation limits the number of recirculation executed + * to prevent unreasonable nesting depth or infinite loop. + * + * Both flow fields and the RECIRC action are exposed as open flow fields via + * Nicira extensions. + * + * Post recirculation flow + * ------------------------ + * + * At the open flow level, post recirculation rules are always hidden from the + * controller. They are installed in table 254 which is set up as a hidden table + * during boot time. Those rules are managed by the local user space program only. + * + * To speed up the classifier look up process, recirc_id is always reflected into + * the metadata field, since recirc_id is required to be exactly matched. + * + * Classifier look up always starts with table 254. A post recirculation flow + * lookup should find its hidden rule within this table. On the other hand, A + * newly received packet should miss all post recirculation rules because its + * recirc_id is zero, then hit a pre-installed lower priority rule to redirect + * classifier to look up starting from table 0: + * + * * , actions=resubmit(,0) + * + * Post recirculation data path flows are managed like other data path flows. + * They are created on demand. Miss handling, stats collection and revalidation + * work the same way as regular flows. + */ + uint32_t ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto); void ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id); #endif /* ofproto-dpif.h */ -- 2.43.0