From: Kyle Mestery Date: Sat, 20 Oct 2012 19:15:24 +0000 (-0700) Subject: datapath: Add support for tun_key to Open vSwitch datapath X-Git-Tag: sliver-openvswitch-1.9.90-1~3^2~26 X-Git-Url: http://git.onelab.eu/?p=sliver-openvswitch.git;a=commitdiff_plain;h=356af50bc2a81305002feb94f04fd0dea9e9eb8f datapath: Add support for tun_key to Open vSwitch datapath This is a first pass at providing a tun_key which can be used as the basis for flow-based tunnelling. The tun_key includes and replaces the tun_id in both struct ovs_skb_cb and struct sw_tun_key. This patch allows all existing tun_id behaviour to still work. Existing users of tun_id are redirected to tun_key->tun_id to retain compatibility. However, when the userspace code is updated to make use of the new tun_key, the old behaviour will be deprecated and removed. NOTE: With these changes, the tunneling code no longer assumes input and output keys are symmetric. If they are not, PMTUD needs to be disabled for tunneling to work. Signed-off-by: Kyle Mestery Signed-off-by: Pravin B Shelar Reviewed-by: Jesse Gross Acked-by: Ben Pfaff --- diff --git a/NEWS b/NEWS index e00deaed4..f5d7f9e63 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,8 @@ post-v1.8.0 ------------------------ + - The tunneling code no longer assumes input and output keys are symmetric. + If they are not, PMTUD needs to be disabled for tunneling to work. Note + this only applies to flow-based keys. - FreeBSD is now a supported platform, thanks to code contributions from Gaetano Catalli, Ed Maste, and Giuseppe Lettieri. - ovs-bugtool: New --ovs option to report only OVS related information. diff --git a/datapath/actions.c b/datapath/actions.c index ec9b595c2..972f7a214 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -37,7 +37,8 @@ #include "vport.h" static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, - const struct nlattr *attr, int len, bool keep_skb); + const struct nlattr *attr, int len, + struct ovs_key_ipv4_tunnel *tun_key, bool keep_skb); static int make_writable(struct sk_buff *skb, int write_len) { @@ -308,7 +309,8 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, } static int sample(struct datapath *dp, struct sk_buff *skb, - const struct nlattr *attr) + const struct nlattr *attr, + struct ovs_key_ipv4_tunnel *tun_key) { const struct nlattr *acts_list = NULL; const struct nlattr *a; @@ -329,11 +331,12 @@ static int sample(struct datapath *dp, struct sk_buff *skb, } return do_execute_actions(dp, skb, nla_data(acts_list), - nla_len(acts_list), true); + nla_len(acts_list), tun_key, true); } static int execute_set_action(struct sk_buff *skb, - const struct nlattr *nested_attr) + const struct nlattr *nested_attr, + struct ovs_key_ipv4_tunnel *tun_key) { int err = 0; @@ -343,7 +346,22 @@ static int execute_set_action(struct sk_buff *skb, break; case OVS_KEY_ATTR_TUN_ID: - OVS_CB(skb)->tun_id = nla_get_be64(nested_attr); + if (!OVS_CB(skb)->tun_key) { + /* If tun_key is NULL for this skb, assign it to + * a value the caller passed in for action processing + * and output. This can disappear once we drop support + * for setting tun_id outside of tun_key. + */ + memset(tun_key, 0, sizeof(struct ovs_key_ipv4_tunnel)); + OVS_CB(skb)->tun_key = tun_key; + } + + OVS_CB(skb)->tun_key->tun_id = nla_get_be64(nested_attr); + OVS_CB(skb)->tun_key->tun_flags |= OVS_FLOW_TNL_F_KEY; + break; + + case OVS_KEY_ATTR_IPV4_TUNNEL: + OVS_CB(skb)->tun_key = nla_data(nested_attr); break; case OVS_KEY_ATTR_ETHERNET: @@ -368,7 +386,8 @@ static int execute_set_action(struct sk_buff *skb, /* Execute a list of actions against 'skb'. */ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, - const struct nlattr *attr, int len, bool keep_skb) + const struct nlattr *attr, int len, + struct ovs_key_ipv4_tunnel *tun_key, bool keep_skb) { /* Every output action needs a separate clone of 'skb', but the common * case is just a single output action, so that doing a clone and @@ -407,11 +426,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, break; case OVS_ACTION_ATTR_SET: - err = execute_set_action(skb, nla_data(a)); + err = execute_set_action(skb, nla_data(a), tun_key); break; case OVS_ACTION_ATTR_SAMPLE: - err = sample(dp, skb, a); + err = sample(dp, skb, a, tun_key); break; } @@ -458,6 +477,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb) struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts); struct loop_counter *loop; int error; + struct ovs_key_ipv4_tunnel tun_key; /* Check whether we've looped too much. */ loop = &__get_cpu_var(loop_counters); @@ -469,9 +489,9 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb) goto out_loop; } - OVS_CB(skb)->tun_id = 0; + OVS_CB(skb)->tun_key = NULL; error = do_execute_actions(dp, skb, acts->actions, - acts->actions_len, false); + acts->actions_len, &tun_key, false); /* Check whether sub-actions looped too much. */ if (unlikely(loop->looping)) diff --git a/datapath/datapath.c b/datapath/datapath.c index a6915fb2b..3f963be26 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -587,12 +587,19 @@ static int validate_set(const struct nlattr *a, switch (key_type) { const struct ovs_key_ipv4 *ipv4_key; + const struct ovs_key_ipv4_tunnel *tun_key; case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_TUN_ID: case OVS_KEY_ATTR_ETHERNET: break; + case OVS_KEY_ATTR_IPV4_TUNNEL: + tun_key = nla_data(ovs_key); + if (!tun_key->ipv4_dst) + return -EINVAL; + break; + case OVS_KEY_ATTR_IPV4: if (flow_key->eth.type != htons(ETH_P_IP)) return -EINVAL; @@ -785,7 +792,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority, &flow->key.phy.in_port, - &flow->key.phy.tun_id, + &flow->key.tun.tun_key, a[OVS_PACKET_ATTR_KEY]); if (err) goto err_flow_put; diff --git a/datapath/datapath.h b/datapath/datapath.h index affbf0e03..c5df12d6a 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -96,7 +96,8 @@ struct datapath { /** * struct ovs_skb_cb - OVS data in skb CB * @flow: The flow associated with this packet. May be %NULL if no flow. - * @tun_id: ID of the tunnel that encapsulated this packet. It is 0 if the + * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the + * packet is not being tunneled. * @ip_summed: Consistently stores L4 checksumming status across different * kernel versions. * @csum_start: Stores the offset from which to start checksumming independent @@ -107,7 +108,7 @@ struct datapath { */ struct ovs_skb_cb { struct sw_flow *flow; - __be64 tun_id; + struct ovs_key_ipv4_tunnel *tun_key; #ifdef NEED_CSUM_NORMALIZE enum csum_type ip_summed; u16 csum_start; diff --git a/datapath/flow.c b/datapath/flow.c index d07337c83..42aff6d87 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -629,7 +629,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, memset(key, 0, sizeof(*key)); key->phy.priority = skb->priority; - key->phy.tun_id = OVS_CB(skb)->tun_id; + if (OVS_CB(skb)->tun_key) + memcpy(&key->tun.tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun.tun_key)); key->phy.in_port = in_port; skb_reset_mac_header(skb); @@ -847,6 +848,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { /* Not upstream. */ [OVS_KEY_ATTR_TUN_ID] = sizeof(__be64), + [OVS_KEY_ATTR_IPV4_TUNNEL] = sizeof(struct ovs_key_ipv4_tunnel), }; static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, @@ -1022,9 +1024,39 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, swkey->phy.in_port = DP_MAX_PORTS; } - if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) { - swkey->phy.tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]); + if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID) && + attrs & (1ULL << OVS_KEY_ATTR_IPV4_TUNNEL)) { + struct ovs_key_ipv4_tunnel *tun_key; + __be64 tun_id; + + tun_key = nla_data(a[OVS_KEY_ATTR_IPV4_TUNNEL]); + + if (!tun_key->ipv4_dst) + return -EINVAL; + if (!(tun_key->tun_flags & OVS_FLOW_TNL_F_KEY)) + return -EINVAL; + + tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]); + if (tun_id != tun_key->tun_id) + return -EINVAL; + + memcpy(&swkey->tun.tun_key, tun_key, sizeof(swkey->tun.tun_key)); + attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID); + attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4_TUNNEL); + } else if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) { + swkey->tun.tun_key.tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]); + swkey->tun.tun_key.tun_flags |= OVS_FLOW_TNL_F_KEY; + attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID); + } else if (attrs & (1ULL << OVS_KEY_ATTR_IPV4_TUNNEL)) { + struct ovs_key_ipv4_tunnel *tun_key; + tun_key = nla_data(a[OVS_KEY_ATTR_IPV4_TUNNEL]); + + if (!tun_key->ipv4_dst) + return -EINVAL; + + memcpy(&swkey->tun.tun_key, tun_key, sizeof(swkey->tun.tun_key)); + attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4_TUNNEL); } /* Data attributes. */ @@ -1162,14 +1194,16 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, * get the metadata, that is, the parts of the flow key that cannot be * extracted from the packet itself. */ -int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id, +int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, + struct ovs_key_ipv4_tunnel *tun_key, const struct nlattr *attr) { const struct nlattr *nla; int rem; + __be64 tun_id; *in_port = DP_MAX_PORTS; - *tun_id = 0; + memset(tun_key, 0, sizeof(*tun_key)); *priority = 0; nla_for_each_nested(nla, attr, rem) { @@ -1185,7 +1219,35 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id, break; case OVS_KEY_ATTR_TUN_ID: - *tun_id = nla_get_be64(nla); + tun_id = nla_get_be64(nla); + + if (tun_key->ipv4_dst) { + if (!(tun_key->tun_flags & OVS_FLOW_TNL_F_KEY)) + return -EINVAL; + if (tun_key->tun_id != tun_id) + return -EINVAL; + break; + } + tun_key->tun_id = tun_id; + tun_key->tun_flags |= OVS_FLOW_TNL_F_KEY; + + break; + + case OVS_KEY_ATTR_IPV4_TUNNEL: + if (tun_key->tun_flags & OVS_FLOW_TNL_F_KEY) { + tun_id = tun_key->tun_id; + + memcpy(tun_key, nla_data(nla), sizeof(*tun_key)); + if (!(tun_key->tun_flags & OVS_FLOW_TNL_F_KEY)) + return -EINVAL; + + if (tun_key->tun_id != tun_id) + return -EINVAL; + } else + memcpy(tun_key, nla_data(nla), sizeof(*tun_key)); + + if (!tun_key->ipv4_dst) + return -EINVAL; break; case OVS_KEY_ATTR_IN_PORT: @@ -1210,8 +1272,16 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) goto nla_put_failure; - if (swkey->phy.tun_id != cpu_to_be64(0) && - nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, swkey->phy.tun_id)) + if (swkey->tun.tun_key.ipv4_dst) { + struct ovs_key_ipv4_tunnel *tun_key; + nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4_TUNNEL, sizeof(*tun_key)); + if (!nla) + goto nla_put_failure; + tun_key = nla_data(nla); + memcpy(tun_key, &swkey->tun.tun_key, sizeof(*tun_key)); + } + if ((swkey->tun.tun_key.tun_flags & OVS_FLOW_TNL_F_KEY) && + nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, swkey->tun.tun_key.tun_id)) goto nla_put_failure; if (swkey->phy.in_port != DP_MAX_PORTS && diff --git a/datapath/flow.h b/datapath/flow.h index 02c563a36..c52e029f5 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -42,10 +42,12 @@ struct sw_flow_actions { struct sw_flow_key { struct { - __be64 tun_id; /* Encapsulating tunnel ID. */ u32 priority; /* Packet QoS priority. */ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ } phy; + struct { + struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ + } tun; struct { u8 src[ETH_ALEN]; /* Ethernet source address. */ u8 dst[ETH_ALEN]; /* Ethernet destination address. */ @@ -150,6 +152,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); * ------ --- ------ ----- * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 * OVS_KEY_ATTR_TUN_ID 8 -- 4 12 + * OVS_KEY_ATTR_IPV4_TUNNEL 24 -- 4 28 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) @@ -160,14 +163,15 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); * OVS_KEY_ATTR_ICMPV6 2 2 4 8 * OVS_KEY_ATTR_ND 28 -- 4 32 * ------------------------------------------------- - * total 156 + * total 184 */ -#define FLOW_BUFSIZE 156 +#define FLOW_BUFSIZE 184 int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, const struct nlattr *); -int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id, +int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, + struct ovs_key_ipv4_tunnel *tun_key, const struct nlattr *); #define MAX_ACTIONS_BUFSIZE (16 * 1024) diff --git a/datapath/tunnel.c b/datapath/tunnel.c index d651c1183..020d9d4b9 100644 --- a/datapath/tunnel.c +++ b/datapath/tunnel.c @@ -367,9 +367,9 @@ struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr, return NULL; } -static void ecn_decapsulate(struct sk_buff *skb, u8 tos) +static void ecn_decapsulate(struct sk_buff *skb) { - if (unlikely(INET_ECN_is_ce(tos))) { + if (unlikely(INET_ECN_is_ce(OVS_CB(skb)->tun_key->ipv4_tos))) { __be16 protocol = skb->protocol; skb_set_network_header(skb, ETH_HLEN); @@ -416,7 +416,7 @@ static void ecn_decapsulate(struct sk_buff *skb, u8 tos) * - skb->csum does not include the inner Ethernet header. * - The layer pointers are undefined. */ -void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos) +void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb) { struct ethhdr *eh; @@ -433,7 +433,7 @@ void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos) skb_clear_rxhash(skb); secpath_reset(skb); - ecn_decapsulate(skb, tos); + ecn_decapsulate(skb); vlan_set_tci(skb, 0); if (unlikely(compute_ip_summed(skb, false))) { @@ -613,7 +613,7 @@ static void ipv6_build_icmp(struct sk_buff *skb, struct sk_buff *nskb, bool ovs_tnl_frag_needed(struct vport *vport, const struct tnl_mutable_config *mutable, - struct sk_buff *skb, unsigned int mtu, __be64 flow_key) + struct sk_buff *skb, unsigned int mtu) { unsigned int eth_hdr_len = ETH_HLEN; unsigned int total_length = 0, header_length = 0, payload_length; @@ -697,17 +697,6 @@ bool ovs_tnl_frag_needed(struct vport *vport, ipv6_build_icmp(skb, nskb, mtu, payload_length); #endif - /* - * Assume that flow based keys are symmetric with respect to input - * and output and use the key that we were going to put on the - * outgoing packet for the fake received packet. If the keys are - * not symmetric then PMTUD needs to be disabled since we won't have - * any way of synthesizing packets. - */ - if ((mutable->flags & (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) == - (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) - OVS_CB(nskb)->tun_id = flow_key; - if (unlikely(compute_ip_summed(nskb, false))) { kfree_skb(nskb); return false; @@ -721,14 +710,26 @@ bool ovs_tnl_frag_needed(struct vport *vport, static bool check_mtu(struct sk_buff *skb, struct vport *vport, const struct tnl_mutable_config *mutable, - const struct rtable *rt, __be16 *frag_offp) + const struct rtable *rt, __be16 *frag_offp, + int tunnel_hlen) { - bool df_inherit = mutable->flags & TNL_F_DF_INHERIT; - bool pmtud = mutable->flags & TNL_F_PMTUD; - __be16 frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0; + bool df_inherit; + bool pmtud; + __be16 frag_off; int mtu = 0; unsigned int packet_length = skb->len - ETH_HLEN; + if (OVS_CB(skb)->tun_key->ipv4_dst) { + df_inherit = false; + pmtud = false; + frag_off = OVS_CB(skb)->tun_key->tun_flags & OVS_FLOW_TNL_F_DONT_FRAGMENT ? + htons(IP_DF) : 0; + } else { + df_inherit = mutable->flags & TNL_F_DF_INHERIT; + pmtud = mutable->flags & TNL_F_PMTUD; + frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0; + } + /* Allow for one level of tagging in the packet length. */ if (!vlan_tx_tag_present(skb) && eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)) @@ -746,7 +747,7 @@ static bool check_mtu(struct sk_buff *skb, mtu = dst_mtu(&rt_dst(rt)) - ETH_HLEN - - mutable->tunnel_hlen + - tunnel_hlen - vlan_header; } @@ -760,8 +761,7 @@ static bool check_mtu(struct sk_buff *skb, mtu = max(mtu, IP_MIN_MTU); if (packet_length > mtu && - ovs_tnl_frag_needed(vport, mutable, skb, mtu, - OVS_CB(skb)->tun_id)) + ovs_tnl_frag_needed(vport, mutable, skb, mtu)) return false; } } @@ -777,8 +777,7 @@ static bool check_mtu(struct sk_buff *skb, mtu = max(mtu, IPV6_MIN_MTU); if (packet_length > mtu && - ovs_tnl_frag_needed(vport, mutable, skb, mtu, - OVS_CB(skb)->tun_id)) + ovs_tnl_frag_needed(vport, mutable, skb, mtu)) return false; } } @@ -790,6 +789,7 @@ static bool check_mtu(struct sk_buff *skb, static void create_tunnel_header(const struct vport *vport, const struct tnl_mutable_config *mutable, + const struct ovs_key_ipv4_tunnel *tun_key, const struct rtable *rt, void *header) { struct tnl_vport *tnl_vport = tnl_vport_priv(vport); @@ -806,7 +806,7 @@ static void create_tunnel_header(const struct vport *vport, if (!iph->ttl) iph->ttl = ip4_dst_hoplimit(&rt_dst(rt)); - tnl_vport->tnl_ops->build_header(vport, mutable, iph + 1); + tnl_vport->tnl_ops->build_header(vport, mutable, tun_key, iph + 1); } static void *get_cached_header(const struct tnl_cache *cache) @@ -907,14 +907,22 @@ static struct tnl_cache *build_cache(struct vport *vport, struct rtable *rt) { struct tnl_vport *tnl_vport = tnl_vport_priv(vport); + static const struct ovs_key_ipv4_tunnel tun_key; struct tnl_cache *cache; void *cache_data; int cache_len; struct hh_cache *hh; + int tunnel_hlen; if (!(mutable->flags & TNL_F_HDR_CACHE)) return NULL; + tunnel_hlen = tnl_vport->tnl_ops->hdr_len(mutable, &tun_key); + if (tunnel_hlen < 0) + return NULL; + + tunnel_hlen += sizeof(struct iphdr); + /* * If there is no entry in the ARP cache or if this device does not * support hard header caching just fall back to the IP stack. @@ -937,7 +945,7 @@ static struct tnl_cache *build_cache(struct vport *vport, else cache = NULL; - cache_len = LL_RESERVED_SPACE(rt_dst(rt).dev) + mutable->tunnel_hlen; + cache_len = LL_RESERVED_SPACE(rt_dst(rt).dev) + tunnel_hlen; cache = kzalloc(ALIGN(sizeof(struct tnl_cache), CACHE_DATA_ALIGN) + cache_len, GFP_ATOMIC); @@ -946,9 +954,9 @@ static struct tnl_cache *build_cache(struct vport *vport, create_eth_hdr(cache, hh); cache_data = get_cached_header(cache) + cache->hh_len; - cache->len = cache->hh_len + mutable->tunnel_hlen; + cache->len = cache->hh_len + tunnel_hlen; - create_tunnel_header(vport, mutable, rt, cache_data); + create_tunnel_header(vport, mutable, &tun_key, rt, cache_data); cache->mutable_seq = mutable->seq; cache->rt = rt; @@ -1000,15 +1008,16 @@ unlock: } static struct rtable *__find_route(const struct tnl_mutable_config *mutable, - u8 ipproto, u8 tos) + __be32 saddr, __be32 daddr, u8 ipproto, + u8 tos) { /* Tunnel configuration keeps DSCP part of TOS bits, But Linux * router expect RT_TOS bits only. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) struct flowi fl = { .nl_u = { .ip4_u = { - .daddr = mutable->key.daddr, - .saddr = mutable->key.saddr, + .daddr = daddr, + .saddr = saddr, .tos = RT_TOS(tos) } }, .proto = ipproto }; struct rtable *rt; @@ -1018,8 +1027,8 @@ static struct rtable *__find_route(const struct tnl_mutable_config *mutable, return rt; #else - struct flowi4 fl = { .daddr = mutable->key.daddr, - .saddr = mutable->key.saddr, + struct flowi4 fl = { .daddr = daddr, + .saddr = saddr, .flowi4_tos = RT_TOS(tos), .flowi4_proto = ipproto }; @@ -1029,7 +1038,8 @@ static struct rtable *__find_route(const struct tnl_mutable_config *mutable, static struct rtable *find_route(struct vport *vport, const struct tnl_mutable_config *mutable, - u8 tos, struct tnl_cache **cache) + __be32 saddr, __be32 daddr, u8 tos, + struct tnl_cache **cache) { struct tnl_vport *tnl_vport = tnl_vport_priv(vport); struct tnl_cache *cur_cache = rcu_dereference(tnl_vport->cache); @@ -1037,17 +1047,17 @@ static struct rtable *find_route(struct vport *vport, *cache = NULL; tos = RT_TOS(tos); - if (likely(tos == RT_TOS(mutable->tos) && - check_cache_valid(cur_cache, mutable))) { + if (tos == RT_TOS(mutable->tos) && + check_cache_valid(cur_cache, mutable)) { *cache = cur_cache; return cur_cache->rt; } else { struct rtable *rt; - rt = __find_route(mutable, tnl_vport->tnl_ops->ipproto, tos); + rt = __find_route(mutable, saddr, daddr, + tnl_vport->tnl_ops->ipproto, tos); if (IS_ERR(rt)) return NULL; - if (likely(tos == RT_TOS(mutable->tos))) *cache = build_cache(vport, mutable, rt); @@ -1076,13 +1086,14 @@ static bool need_linearize(const struct sk_buff *skb) static struct sk_buff *handle_offloads(struct sk_buff *skb, const struct tnl_mutable_config *mutable, - const struct rtable *rt) + const struct rtable *rt, + int tunnel_hlen) { int min_headroom; int err; min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len - + mutable->tunnel_hlen + + tunnel_hlen + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { @@ -1137,14 +1148,14 @@ error: } static int send_frags(struct sk_buff *skb, - const struct tnl_mutable_config *mutable) + int tunnel_hlen) { int sent_len; sent_len = 0; while (skb) { struct sk_buff *next = skb->next; - int frag_len = skb->len - mutable->tunnel_hlen; + int frag_len = skb->len - tunnel_hlen; int err; skb->next = NULL; @@ -1173,15 +1184,17 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb) { struct tnl_vport *tnl_vport = tnl_vport_priv(vport); const struct tnl_mutable_config *mutable = rcu_dereference(tnl_vport->mutable); - enum vport_err_type err = VPORT_E_TX_ERROR; struct rtable *rt; struct dst_entry *unattached_dst = NULL; struct tnl_cache *cache; + struct ovs_key_ipv4_tunnel tun_key; int sent_len = 0; + int tunnel_hlen; __be16 frag_off = 0; + __be32 daddr; + __be32 saddr; u8 ttl; - u8 inner_tos; u8 tos; /* Validate the protocol headers before we try to use them. */ @@ -1207,30 +1220,68 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb) } #endif - /* ToS */ - if (skb->protocol == htons(ETH_P_IP)) - inner_tos = ip_hdr(skb)->tos; + /* If OVS_CB(skb)->tun_key is NULL, point it at the local tun_key here, + * and zero it out. + */ + if (!OVS_CB(skb)->tun_key) { + memset(&tun_key, 0, sizeof(tun_key)); + OVS_CB(skb)->tun_key = &tun_key; + } + + tunnel_hlen = tnl_vport->tnl_ops->hdr_len(mutable, OVS_CB(skb)->tun_key); + if (unlikely(tunnel_hlen < 0)) { + err = VPORT_E_TX_DROPPED; + goto error_free; + } + tunnel_hlen += sizeof(struct iphdr); + + if (OVS_CB(skb)->tun_key->ipv4_dst) { + daddr = OVS_CB(skb)->tun_key->ipv4_dst; + saddr = OVS_CB(skb)->tun_key->ipv4_src; + tos = OVS_CB(skb)->tun_key->ipv4_tos; + ttl = OVS_CB(skb)->tun_key->ipv4_ttl; + } else { + u8 inner_tos; + daddr = mutable->key.daddr; + saddr = mutable->key.saddr; + + /* ToS */ + if (skb->protocol == htons(ETH_P_IP)) + inner_tos = ip_hdr(skb)->tos; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (skb->protocol == htons(ETH_P_IPV6)) - inner_tos = ipv6_get_dsfield(ipv6_hdr(skb)); + else if (skb->protocol == htons(ETH_P_IPV6)) + inner_tos = ipv6_get_dsfield(ipv6_hdr(skb)); #endif - else - inner_tos = 0; + else + inner_tos = 0; - if (mutable->flags & TNL_F_TOS_INHERIT) - tos = inner_tos; - else - tos = mutable->tos; + if (mutable->flags & TNL_F_TOS_INHERIT) + tos = inner_tos; + else + tos = mutable->tos; + + tos = INET_ECN_encapsulate(tos, inner_tos); + + /* TTL */ + ttl = mutable->ttl; + if (mutable->flags & TNL_F_TTL_INHERIT) { + if (skb->protocol == htons(ETH_P_IP)) + ttl = ip_hdr(skb)->ttl; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + else if (skb->protocol == htons(ETH_P_IPV6)) + ttl = ipv6_hdr(skb)->hop_limit; +#endif + } + + } /* Route lookup */ - rt = find_route(vport, mutable, tos, &cache); + rt = find_route(vport, mutable, saddr, daddr, tos, &cache); if (unlikely(!rt)) goto error_free; if (unlikely(!cache)) unattached_dst = &rt_dst(rt); - tos = INET_ECN_encapsulate(tos, inner_tos); - /* Reset SKB */ nf_reset(skb); secpath_reset(skb); @@ -1238,12 +1289,12 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb) skb_clear_rxhash(skb); /* Offloading */ - skb = handle_offloads(skb, mutable, rt); + skb = handle_offloads(skb, mutable, rt, tunnel_hlen); if (IS_ERR(skb)) goto error; /* MTU */ - if (unlikely(!check_mtu(skb, vport, mutable, rt, &frag_off))) { + if (unlikely(!check_mtu(skb, vport, mutable, rt, &frag_off, tunnel_hlen))) { err = VPORT_E_TX_DROPPED; goto error_free; } @@ -1252,25 +1303,19 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb) * If we are over the MTU, allow the IP stack to handle fragmentation. * Fragmentation is a slow path anyways. */ - if (unlikely(skb->len + mutable->tunnel_hlen > dst_mtu(&rt_dst(rt)) && + if (unlikely(skb->len + tunnel_hlen > dst_mtu(&rt_dst(rt)) && cache)) { unattached_dst = &rt_dst(rt); dst_hold(unattached_dst); cache = NULL; } - /* TTL */ - ttl = mutable->ttl; - if (!ttl) - ttl = ip4_dst_hoplimit(&rt_dst(rt)); - - if (mutable->flags & TNL_F_TTL_INHERIT) { - if (skb->protocol == htons(ETH_P_IP)) - ttl = ip_hdr(skb)->ttl; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (skb->protocol == htons(ETH_P_IPV6)) - ttl = ipv6_hdr(skb)->hop_limit; -#endif + /* TTL Fixup. */ + if (!OVS_CB(skb)->tun_key->ipv4_dst) { + if (!(mutable->flags & TNL_F_TTL_INHERIT)) { + if (!ttl) + ttl = ip4_dst_hoplimit(&rt_dst(rt)); + } } while (skb) { @@ -1288,8 +1333,8 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb) skb_set_network_header(skb, cache->hh_len); } else { - skb_push(skb, mutable->tunnel_hlen); - create_tunnel_header(vport, mutable, rt, skb->data); + skb_push(skb, tunnel_hlen); + create_tunnel_header(vport, mutable, OVS_CB(skb)->tun_key, rt, skb->data); skb_reset_network_header(skb); if (next_skb) @@ -1308,7 +1353,7 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb) ip_select_ident(iph, &rt_dst(rt), NULL); skb = tnl_vport->tnl_ops->update_header(vport, mutable, - &rt_dst(rt), skb); + &rt_dst(rt), skb, tunnel_hlen); if (unlikely(!skb)) goto next; @@ -1341,7 +1386,7 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb) sent_len += orig_len; } } else - sent_len += send_frags(skb, mutable); + sent_len += send_frags(skb, tunnel_hlen); next: skb = next_skb; @@ -1427,12 +1472,6 @@ static int tnl_set_config(struct net *net, struct nlattr *options, else mutable->out_key = nla_get_be64(a[OVS_TUNNEL_ATTR_OUT_KEY]); - mutable->tunnel_hlen = tnl_ops->hdr_len(mutable); - if (mutable->tunnel_hlen < 0) - return mutable->tunnel_hlen; - - mutable->tunnel_hlen += sizeof(struct iphdr); - old_vport = port_table_lookup(&mutable->key, &old_mutable); if (old_vport && old_vport != cur_vport) return -EEXIST; @@ -1442,7 +1481,8 @@ static int tnl_set_config(struct net *net, struct nlattr *options, struct net_device *dev; struct rtable *rt; - rt = __find_route(mutable, tnl_ops->ipproto, mutable->tos); + rt = __find_route(mutable, mutable->key.saddr, mutable->key.daddr, + tnl_ops->ipproto, mutable->tos); if (IS_ERR(rt)) return -EADDRNOTAVAIL; dev = rt_dst(rt).dev; diff --git a/datapath/tunnel.h b/datapath/tunnel.h index d2a87f27d..951a6f1f7 100644 --- a/datapath/tunnel.h +++ b/datapath/tunnel.h @@ -109,8 +109,6 @@ struct tnl_mutable_config { unsigned seq; - unsigned tunnel_hlen; - unsigned char eth_addr[ETH_ALEN]; /* Configured via OVS_TUNNEL_ATTR_* attributes. */ @@ -132,7 +130,8 @@ struct tnl_ops { * build_header() (i.e. excludes the IP header). Returns a negative * error code if the configuration is invalid. */ - int (*hdr_len)(const struct tnl_mutable_config *); + int (*hdr_len)(const struct tnl_mutable_config *, + const struct ovs_key_ipv4_tunnel *); /* * Builds the static portion of the tunnel header, which is stored in @@ -143,7 +142,8 @@ struct tnl_ops { * called for every packet, so try not to make it too slow. */ void (*build_header)(const struct vport *, - const struct tnl_mutable_config *, void *header); + const struct tnl_mutable_config *, + const struct ovs_key_ipv4_tunnel *, void *header); /* * Updates the cached header of a packet to match the actual packet @@ -155,7 +155,8 @@ struct tnl_ops { */ struct sk_buff *(*update_header)(const struct vport *, const struct tnl_mutable_config *, - struct dst_entry *, struct sk_buff *); + struct dst_entry *, struct sk_buff *, + int tunnel_hlen); }; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) @@ -270,14 +271,14 @@ int ovs_tnl_set_addr(struct vport *vport, const unsigned char *addr); const char *ovs_tnl_get_name(const struct vport *vport); const unsigned char *ovs_tnl_get_addr(const struct vport *vport); int ovs_tnl_send(struct vport *vport, struct sk_buff *skb); -void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos); +void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb); struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr, __be64 key, int tunnel_type, const struct tnl_mutable_config **mutable); bool ovs_tnl_frag_needed(struct vport *vport, const struct tnl_mutable_config *mutable, - struct sk_buff *skb, unsigned int mtu, __be64 flow_key); + struct sk_buff *skb, unsigned int mtu); void ovs_tnl_free_linked_skbs(struct sk_buff *skb); int ovs_tnl_init(void); @@ -287,4 +288,15 @@ static inline struct tnl_vport *tnl_vport_priv(const struct vport *vport) return vport_priv(vport); } +static inline void tnl_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, + const struct iphdr *iph, __be64 tun_id, u32 tun_flags) +{ + tun_key->tun_id = tun_id; + tun_key->ipv4_src = iph->saddr; + tun_key->ipv4_dst = iph->daddr; + tun_key->ipv4_tos = iph->tos; + tun_key->ipv4_ttl = iph->ttl; + tun_key->tun_flags = tun_flags; +} + #endif /* tunnel.h */ diff --git a/datapath/vport-capwap.c b/datapath/vport-capwap.c index 05a099d63..8a63416b9 100644 --- a/datapath/vport-capwap.c +++ b/datapath/vport-capwap.c @@ -155,16 +155,52 @@ static struct inet_frags frag_state = { .secret_interval = CAPWAP_FRAG_SECRET_INTERVAL, }; -static int capwap_hdr_len(const struct tnl_mutable_config *mutable) +static int get_capwap_param(const struct tnl_mutable_config *mutable, + const struct ovs_key_ipv4_tunnel *tun_key, + u32 *flags, __be64 *out_key) +{ + if (tun_key->ipv4_dst) { + *flags = 0; + + if (tun_key->tun_flags & OVS_FLOW_TNL_F_KEY) + *flags = TNL_F_OUT_KEY_ACTION; + if (tun_key->tun_flags & OVS_FLOW_TNL_F_CSUM) + *flags |= TNL_F_CSUM; + *out_key = tun_key->tun_id; + } else { + *flags = mutable->flags; + if (mutable->flags & TNL_F_OUT_KEY_ACTION) { + if (likely(tun_key->tun_flags & OVS_FLOW_TNL_F_KEY)) { + *out_key = tun_key->tun_id; + } else { + *out_key = 0; + return -EINVAL; + } + } else + *out_key = mutable->out_key; + + } + return 0; +} + +static int capwap_hdr_len(const struct tnl_mutable_config *mutable, + const struct ovs_key_ipv4_tunnel *tun_key) { int size = CAPWAP_MIN_HLEN; + u32 flags; + __be64 out_key; + int err; + + err = get_capwap_param(mutable, tun_key, &flags, &out_key); + if (err) + return err; /* CAPWAP has no checksums. */ - if (mutable->flags & TNL_F_CSUM) + if (flags & TNL_F_CSUM) return -EINVAL; /* if keys are specified, then add WSI field */ - if (mutable->out_key || (mutable->flags & TNL_F_OUT_KEY_ACTION)) { + if (out_key || (flags & TNL_F_OUT_KEY_ACTION)) { size += sizeof(struct capwaphdr_wsi) + sizeof(struct capwaphdr_wsi_key); } @@ -174,10 +210,15 @@ static int capwap_hdr_len(const struct tnl_mutable_config *mutable) static void capwap_build_header(const struct vport *vport, const struct tnl_mutable_config *mutable, + const struct ovs_key_ipv4_tunnel *tun_key, void *header) { struct udphdr *udph = header; struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1); + u32 flags; + __be64 out_key; + + get_capwap_param(mutable, tun_key, &flags, &out_key); udph->source = htons(CAPWAP_SRC_PORT); udph->dest = htons(CAPWAP_DST_PORT); @@ -186,7 +227,7 @@ static void capwap_build_header(const struct vport *vport, cwh->frag_id = 0; cwh->frag_off = 0; - if (mutable->out_key || (mutable->flags & TNL_F_OUT_KEY_ACTION)) { + if (out_key || (flags & TNL_F_OUT_KEY_ACTION)) { struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1); cwh->begin = CAPWAP_KEYED; @@ -197,9 +238,9 @@ static void capwap_build_header(const struct vport *vport, wsi->flags = CAPWAP_WSI_F_KEY64; wsi->reserved_padding = 0; - if (mutable->out_key) { + if (out_key) { struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1); - opt->key = mutable->out_key; + opt->key = out_key; } } else { /* make packet readable by old capwap code */ @@ -210,30 +251,39 @@ static void capwap_build_header(const struct vport *vport, static struct sk_buff *capwap_update_header(const struct vport *vport, const struct tnl_mutable_config *mutable, struct dst_entry *dst, - struct sk_buff *skb) + struct sk_buff *skb, + int tunnel_hlen) { + const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; struct udphdr *udph = udp_hdr(skb); + u32 flags; + __be64 out_key; - if (mutable->flags & TNL_F_OUT_KEY_ACTION) { + if (get_capwap_param(mutable, tun_key, &flags, &out_key)) { + kfree_skb(skb); + return NULL; + } + + if (flags & TNL_F_OUT_KEY_ACTION) { /* first field in WSI is key */ struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1); struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1); struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1); - opt->key = OVS_CB(skb)->tun_id; + opt->key = out_key; } udph->len = htons(skb->len - skb_transport_offset(skb)); if (unlikely(skb->len - skb_network_offset(skb) > dst_mtu(dst))) { - unsigned int hlen = skb_transport_offset(skb) + capwap_hdr_len(mutable); + unsigned int hlen = skb_transport_offset(skb) + capwap_hdr_len(mutable, tun_key); skb = fragment(skb, vport, dst, hlen); } return skb; } -static int process_capwap_wsi(struct sk_buff *skb, __be64 *key) +static int process_capwap_wsi(struct sk_buff *skb, __be64 *key, bool *key_present) { struct capwaphdr *cwh = capwap_hdr(skb); struct capwaphdr_wsi *wsi; @@ -270,12 +320,15 @@ static int process_capwap_wsi(struct sk_buff *skb, __be64 *key) opt = (struct capwaphdr_wsi_key *)(wsi + 1); *key = opt->key; + *key_present = true; + } else { + *key_present = false; } return 0; } -static struct sk_buff *process_capwap_proto(struct sk_buff *skb, __be64 *key) +static struct sk_buff *process_capwap_proto(struct sk_buff *skb, __be64 *key, bool *key_present) { struct capwaphdr *cwh = capwap_hdr(skb); int hdr_len = sizeof(struct udphdr); @@ -301,7 +354,7 @@ static struct sk_buff *process_capwap_proto(struct sk_buff *skb, __be64 *key) cwh = capwap_hdr(skb); } - if ((cwh->begin & CAPWAP_F_WSI) && process_capwap_wsi(skb, key)) + if ((cwh->begin & CAPWAP_F_WSI) && process_capwap_wsi(skb, key, key_present)) goto error; return skb; @@ -316,12 +369,14 @@ static int capwap_rcv(struct sock *sk, struct sk_buff *skb) struct vport *vport; const struct tnl_mutable_config *mutable; struct iphdr *iph; + struct ovs_key_ipv4_tunnel tun_key; __be64 key = 0; + bool key_present = false; if (unlikely(!pskb_may_pull(skb, CAPWAP_MIN_HLEN + ETH_HLEN))) goto error; - skb = process_capwap_proto(skb, &key); + skb = process_capwap_proto(skb, &key, &key_present); if (unlikely(!skb)) goto out; @@ -333,12 +388,14 @@ static int capwap_rcv(struct sock *sk, struct sk_buff *skb) goto error; } - if (mutable->flags & TNL_F_IN_KEY_MATCH) - OVS_CB(skb)->tun_id = key; - else - OVS_CB(skb)->tun_id = 0; + if (key_present && mutable->key.daddr && + !(mutable->flags & TNL_F_IN_KEY_MATCH)) + key_present = false; + + tnl_tun_key_init(&tun_key, iph, key, key_present ? OVS_FLOW_TNL_F_KEY : 0); + OVS_CB(skb)->tun_key = &tun_key; - ovs_tnl_rcv(vport, skb, iph->tos); + ovs_tnl_rcv(vport, skb); goto out; error: diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c index e3a190f50..a25da0263 100644 --- a/datapath/vport-gre.c +++ b/datapath/vport-gre.c @@ -45,22 +45,61 @@ struct gre_base_hdr { __be16 protocol; }; -static int gre_hdr_len(const struct tnl_mutable_config *mutable) +static int get_gre_param(const struct tnl_mutable_config *mutable, + const struct ovs_key_ipv4_tunnel *tun_key, + u32 *flags, u32 *tunnel_type, __be64 *out_key) +{ + if (tun_key->ipv4_dst) { + *flags = 0; + + if (tun_key->tun_flags & OVS_FLOW_TNL_F_KEY) + *flags = TNL_F_OUT_KEY_ACTION; + if (tun_key->tun_flags & OVS_FLOW_TNL_F_CSUM) + *flags |= TNL_F_CSUM; + *tunnel_type = TNL_T_PROTO_GRE; + *out_key = tun_key->tun_id; + } else { + *flags = mutable->flags; + *tunnel_type = mutable->key.tunnel_type; + if (mutable->flags & TNL_F_OUT_KEY_ACTION) { + if (likely(tun_key->tun_flags & OVS_FLOW_TNL_F_KEY)) { + *out_key = tun_key->tun_id; + } else { + *out_key = 0; + return -EINVAL; + } + } else + *out_key = mutable->out_key; + + } + return 0; +} + +static int gre_hdr_len(const struct tnl_mutable_config *mutable, + const struct ovs_key_ipv4_tunnel *tun_key) { int len; + u32 flags; + u32 tunnel_type; + __be64 out_key; + int err; + + err = get_gre_param(mutable, tun_key, &flags, &tunnel_type, &out_key); + if (err) + return err; len = GRE_HEADER_SECTION; - if (mutable->flags & TNL_F_CSUM) + if (flags & TNL_F_CSUM) len += GRE_HEADER_SECTION; /* Set key for GRE64 tunnels, even when key if is zero. */ - if (mutable->out_key || - mutable->key.tunnel_type & TNL_T_PROTO_GRE64 || - mutable->flags & TNL_F_OUT_KEY_ACTION) { + if (out_key || + tunnel_type & TNL_T_PROTO_GRE64 || + flags & TNL_F_OUT_KEY_ACTION) { len += GRE_HEADER_SECTION; - if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64) + if (tunnel_type & TNL_T_PROTO_GRE64) len += GRE_HEADER_SECTION; } return len; @@ -88,32 +127,38 @@ static __be32 be64_get_high32(__be64 x) static void gre_build_header(const struct vport *vport, const struct tnl_mutable_config *mutable, + const struct ovs_key_ipv4_tunnel *tun_key, void *header) { struct gre_base_hdr *greh = header; __be32 *options = (__be32 *)(greh + 1); + u32 flags; + u32 tunnel_type; + __be64 out_key; + + get_gre_param(mutable, tun_key, &flags, &tunnel_type, &out_key); greh->protocol = htons(ETH_P_TEB); greh->flags = 0; - if (mutable->flags & TNL_F_CSUM) { + if (flags & TNL_F_CSUM) { greh->flags |= GRE_CSUM; *options = 0; options++; } - if (mutable->flags & TNL_F_OUT_KEY_ACTION) { + if (flags & TNL_F_OUT_KEY_ACTION) { greh->flags |= GRE_KEY; - if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64) + if (tunnel_type & TNL_T_PROTO_GRE64) greh->flags |= GRE_SEQ; - } else if (mutable->out_key || - mutable->key.tunnel_type & TNL_T_PROTO_GRE64) { + } else if (out_key || + tunnel_type & TNL_T_PROTO_GRE64) { greh->flags |= GRE_KEY; - *options = be64_get_low32(mutable->out_key); - if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64) { + *options = be64_get_low32(out_key); + if (tunnel_type & TNL_T_PROTO_GRE64) { options++; - *options = be64_get_high32(mutable->out_key); + *options = be64_get_high32(out_key); greh->flags |= GRE_SEQ; } } @@ -122,28 +167,37 @@ static void gre_build_header(const struct vport *vport, static struct sk_buff *gre_update_header(const struct vport *vport, const struct tnl_mutable_config *mutable, struct dst_entry *dst, - struct sk_buff *skb) + struct sk_buff *skb, + int tunnel_hlen) { - __be32 *options = (__be32 *)(skb_network_header(skb) + mutable->tunnel_hlen + u32 flags; + u32 tunnel_type; + __be64 out_key; + const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; + __be32 *options = (__be32 *)(skb_network_header(skb) + tunnel_hlen - GRE_HEADER_SECTION); + if (get_gre_param(mutable, tun_key, &flags, &tunnel_type, &out_key)) { + kfree_skb(skb); + return NULL; + } + /* Work backwards over the options so the checksum is last. */ - if (mutable->flags & TNL_F_OUT_KEY_ACTION) { - if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64) { + if (flags & TNL_F_OUT_KEY_ACTION) { + if (tunnel_type & TNL_T_PROTO_GRE64) { /* Set higher 32 bits to seq. */ - *options = be64_get_high32(OVS_CB(skb)->tun_id); + *options = be64_get_high32(out_key); options--; } - *options = be64_get_low32(OVS_CB(skb)->tun_id); + *options = be64_get_low32(out_key); options--; - } else if (mutable->out_key || - mutable->key.tunnel_type & TNL_T_PROTO_GRE64) { + } else if (out_key || tunnel_type & TNL_T_PROTO_GRE64) { options--; - if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64) + if (tunnel_type & TNL_T_PROTO_GRE64) options--; } - if (mutable->flags & TNL_F_CSUM) + if (flags & TNL_F_CSUM) *(__sum16 *)options = csum_fold(skb_checksum(skb, skb_transport_offset(skb), skb->len - skb_transport_offset(skb), @@ -335,7 +389,7 @@ static void gre_err(struct sk_buff *skb, u32 info) #endif __skb_pull(skb, tunnel_hdr_len); - ovs_tnl_frag_needed(vport, mutable, skb, mtu, key); + ovs_tnl_frag_needed(vport, mutable, skb, mtu); __skb_push(skb, tunnel_hdr_len); out: @@ -370,6 +424,24 @@ static bool check_checksum(struct sk_buff *skb) return (csum == 0); } +static u32 gre_flags_to_tunnel_flags(const struct tnl_mutable_config *mutable, + __be16 gre_flags) +{ + u32 tunnel_flags = 0; + + if (gre_flags & GRE_KEY) { + if (mutable->key.daddr && (mutable->flags & TNL_F_IN_KEY_MATCH)) + tunnel_flags = OVS_FLOW_TNL_F_KEY; + else if (!mutable->key.daddr) + tunnel_flags = OVS_FLOW_TNL_F_KEY; + } + + if (gre_flags & GRE_CSUM) + tunnel_flags |= OVS_FLOW_TNL_F_CSUM; + + return tunnel_flags; +} + /* Called with rcu_read_lock and BH disabled. */ static int gre_rcv(struct sk_buff *skb) { @@ -377,6 +449,7 @@ static int gre_rcv(struct sk_buff *skb) const struct tnl_mutable_config *mutable; int hdr_len; struct iphdr *iph; + struct ovs_key_ipv4_tunnel tun_key; __be16 flags; __be64 key; u32 tunnel_type; @@ -401,15 +474,13 @@ static int gre_rcv(struct sk_buff *skb) goto error; } - if (mutable->flags & TNL_F_IN_KEY_MATCH) - OVS_CB(skb)->tun_id = key; - else - OVS_CB(skb)->tun_id = 0; + tnl_tun_key_init(&tun_key, iph, key, gre_flags_to_tunnel_flags(mutable, flags)); + OVS_CB(skb)->tun_key = &tun_key; __skb_pull(skb, hdr_len); skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN); - ovs_tnl_rcv(vport, skb, iph->tos); + ovs_tnl_rcv(vport, skb); return 0; error: diff --git a/datapath/vport.c b/datapath/vport.c index af1c066fe..d9c8cfd20 100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@ -463,7 +463,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) OVS_CB(skb)->flow = NULL; if (!(vport->ops->flags & VPORT_F_TUN_ID)) - OVS_CB(skb)->tun_id = 0; + OVS_CB(skb)->tun_key = NULL; ovs_dp_process_received_packet(vport, skb); } diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index 294f6d08b..89feb61f6 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -279,7 +279,8 @@ enum ovs_key_attr { OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */ OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ - OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */ + OVS_KEY_ATTR_IPV4_TUNNEL = 62, /* struct ovs_key_ipv4_tunnel */ + OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */ __OVS_KEY_ATTR_MAX }; @@ -361,6 +362,21 @@ struct ovs_key_nd { __u8 nd_tll[6]; }; +/* Values for ovs_key_ipv4_tunnel->tun_flags */ +#define OVS_FLOW_TNL_F_DONT_FRAGMENT (1 << 0) +#define OVS_FLOW_TNL_F_CSUM (1 << 1) +#define OVS_FLOW_TNL_F_KEY (1 << 2) + +struct ovs_key_ipv4_tunnel { + __be64 tun_id; + __u32 tun_flags; + __be32 ipv4_src; + __be32 ipv4_dst; + __u8 ipv4_tos; + __u8 ipv4_ttl; + __u8 pad[2]; +}; + /** * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index c9e3210f1..797cb06b6 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1179,6 +1179,7 @@ execute_set_action(struct ofpbuf *packet, const struct nlattr *a) case OVS_KEY_ATTR_TUN_ID: case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_IPV6: + case OVS_KEY_ATTR_IPV4_TUNNEL: /* not implemented */ break; diff --git a/lib/odp-util.c b/lib/odp-util.c index 257d7a7ac..9ed17ed89 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -93,6 +93,8 @@ ovs_key_attr_to_string(enum ovs_key_attr attr) case OVS_KEY_ATTR_UNSPEC: return "unspec"; case OVS_KEY_ATTR_ENCAP: return "encap"; case OVS_KEY_ATTR_PRIORITY: return "priority"; + case OVS_KEY_ATTR_TUN_ID: return "tun_id"; + case OVS_KEY_ATTR_IPV4_TUNNEL: return "ipv4_tunnel"; case OVS_KEY_ATTR_IN_PORT: return "in_port"; case OVS_KEY_ATTR_ETHERNET: return "eth"; case OVS_KEY_ATTR_VLAN: return "vlan"; @@ -105,7 +107,6 @@ ovs_key_attr_to_string(enum ovs_key_attr attr) case OVS_KEY_ATTR_ICMPV6: return "icmpv6"; case OVS_KEY_ATTR_ARP: return "arp"; case OVS_KEY_ATTR_ND: return "nd"; - case OVS_KEY_ATTR_TUN_ID: return "tun_id"; case __OVS_KEY_ATTR_MAX: default: @@ -602,6 +603,7 @@ odp_flow_key_attr_len(uint16_t type) case OVS_KEY_ATTR_ENCAP: return -2; case OVS_KEY_ATTR_PRIORITY: return 4; case OVS_KEY_ATTR_TUN_ID: return 8; + case OVS_KEY_ATTR_IPV4_TUNNEL: return sizeof(struct ovs_key_ipv4_tunnel); case OVS_KEY_ATTR_IN_PORT: return 4; case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet); case OVS_KEY_ATTR_VLAN: return sizeof(ovs_be16); @@ -668,6 +670,7 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds) const struct ovs_key_icmpv6 *icmpv6_key; const struct ovs_key_arp *arp_key; const struct ovs_key_nd *nd_key; + const struct ovs_key_ipv4_tunnel *ipv4_tun_key; enum ovs_key_attr attr = nl_attr_type(a); int expected_len; @@ -698,6 +701,16 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds) ds_put_format(ds, "(%#"PRIx64")", ntohll(nl_attr_get_be64(a))); break; + case OVS_KEY_ATTR_IPV4_TUNNEL: + ipv4_tun_key = nl_attr_get(a); + ds_put_format(ds, "(tun_id=0x%"PRIx64",flags=0x%"PRIx32 + ",src="IP_FMT",dst="IP_FMT",tos=0x%"PRIx8",ttl=%"PRIu8")", + ntohll(ipv4_tun_key->tun_id), ipv4_tun_key->tun_flags, + IP_ARGS(&ipv4_tun_key->ipv4_src), + IP_ARGS(&ipv4_tun_key->ipv4_dst), + ipv4_tun_key->ipv4_tos, ipv4_tun_key->ipv4_ttl); + break; + case OVS_KEY_ATTR_IN_PORT: ds_put_format(ds, "(%"PRIu32")", nl_attr_get_u32(a)); break; diff --git a/lib/odp-util.h b/lib/odp-util.h index 16f2b1567..57073bad9 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -80,6 +80,7 @@ int odp_actions_from_string(const char *, const struct simap *port_names, * ------ --- ------ ----- * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 * OVS_KEY_ATTR_TUN_ID 8 -- 4 12 + * OVS_KEY_ATTR_IPV4_TUNNEL 24 -- 4 28 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) @@ -90,7 +91,7 @@ int odp_actions_from_string(const char *, const struct simap *port_names, * OVS_KEY_ATTR_ICMPV6 2 2 4 8 * OVS_KEY_ATTR_ND 28 -- 4 32 * ------------------------------------------------- - * total 156 + * total 184 * * We include some slack space in case the calculation isn't quite right or we * add another field and forget to adjust this value.