datapath: Fix tunnel source port selection for mega flow
[sliver-openvswitch.git] / datapath / tunnel.c
index 87f212a..ef46a69 100644 (file)
 #include "vlan.h"
 #include "vport.h"
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
-#define rt_dst(rt) (rt->dst)
-#else
-#define rt_dst(rt) (rt->u.dst)
-#endif
-
 /**
  *     ovs_tnl_rcv - ingress point for generic tunnel code
  *
  * - skb->csum does not include the inner Ethernet header.
  * - The layer pointers are undefined.
  */
-void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb)
+void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb,
+                struct ovs_key_ipv4_tunnel *tun_key)
 {
        struct ethhdr *eh;
 
        skb_reset_mac_header(skb);
        eh = eth_hdr(skb);
 
-       if (likely(ntohs(eh->h_proto) >= 1536))
+       if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN))
                skb->protocol = eh->h_proto;
        else
                skb->protocol = htons(ETH_P_802_2);
@@ -81,12 +76,12 @@ void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb)
                return;
        }
 
-       ovs_vport_receive(vport, skb);
+       ovs_vport_receive(vport, skb, tun_key);
 }
 
-static struct rtable *find_route(struct net *net,
-               __be32 *saddr, __be32 daddr, u8 ipproto,
-               u8 tos, u32 skb_mark)
+struct rtable *find_route(struct net *net,
+                         __be32 *saddr, __be32 daddr, u8 ipproto,
+                         u8 tos, u32 skb_mark)
 {
        struct rtable *rt;
        /* Tunnel configuration keeps DSCP part of TOS bits, But Linux
@@ -141,27 +136,10 @@ static bool need_linearize(const struct sk_buff *skb)
        return false;
 }
 
-static struct sk_buff *handle_offloads(struct sk_buff *skb,
-                                      const struct rtable *rt,
-                                      int tunnel_hlen)
+static struct sk_buff *handle_offloads(struct sk_buff *skb)
 {
-       int min_headroom;
        int err;
 
-       min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
-                       + tunnel_hlen
-                       + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
-
-       if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
-               int head_delta = SKB_DATA_ALIGN(min_headroom -
-                                               skb_headroom(skb) +
-                                               16);
-               err = pskb_expand_head(skb, max_t(int, head_delta, 0),
-                                       0, GFP_ATOMIC);
-               if (unlikely(err))
-                       goto error_free;
-       }
-
        forward_ip_summed(skb, true);
 
        if (skb_is_gso(skb)) {
@@ -169,7 +147,6 @@ static struct sk_buff *handle_offloads(struct sk_buff *skb,
 
                nskb = __skb_gso_segment(skb, 0, false);
                if (IS_ERR(nskb)) {
-                       kfree_skb(skb);
                        err = PTR_ERR(nskb);
                        goto error;
                }
@@ -185,20 +162,18 @@ static struct sk_buff *handle_offloads(struct sk_buff *skb,
                if (unlikely(need_linearize(skb))) {
                        err = __skb_linearize(skb);
                        if (unlikely(err))
-                               goto error_free;
+                               goto error;
                }
 
                err = skb_checksum_help(skb);
                if (unlikely(err))
-                       goto error_free;
+                       goto error;
        }
 
        set_ip_summed(skb, OVS_CSUM_NONE);
 
        return skb;
 
-error_free:
-       kfree_skb(skb);
 error:
        return ERR_PTR(err);
 }
@@ -211,45 +186,65 @@ u16 ovs_tnl_get_src_port(struct sk_buff *skb)
        int low;
        int high;
        unsigned int range;
-       u32 hash = OVS_CB(skb)->flow->hash;
+       struct sw_flow_key *pkt_key = OVS_CB(skb)->pkt_key;
+       u32 hash = jhash2((const u32 *)pkt_key,
+                         sizeof(*pkt_key) / sizeof(u32), 0);
 
        inet_get_local_port_range(&low, &high);
        range = (high - low) + 1;
        return (((u64) hash * range) >> 32) + low;
 }
 
-int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
+int ovs_tnl_send(struct vport *vport, struct sk_buff *skb,
+                u8 ipproto, int tunnel_hlen,
+                void (*build_header)(const struct vport *,
+                                     struct sk_buff *,
+                                     int tunnel_hlen))
 {
-       struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
-       enum vport_err_type err = VPORT_E_TX_ERROR;
+       int min_headroom;
        struct rtable *rt;
        __be32 saddr;
        int sent_len = 0;
-       int tunnel_hlen;
-
-       if (unlikely(!OVS_CB(skb)->tun_key))
-               goto error_free;
+       int err;
+       struct sk_buff *nskb;
 
        /* Route lookup */
        saddr = OVS_CB(skb)->tun_key->ipv4_src;
        rt = find_route(ovs_dp_get_net(vport->dp),
                        &saddr,
                        OVS_CB(skb)->tun_key->ipv4_dst,
-                       tnl_vport->tnl_ops->ipproto,
+                       ipproto,
                        OVS_CB(skb)->tun_key->ipv4_tos,
                        skb_get_mark(skb));
-       if (IS_ERR(rt))
-               goto error_free;
+       if (IS_ERR(rt)) {
+               err = PTR_ERR(rt);
+               goto error;
+       }
 
-       /* Offloading */
-       tunnel_hlen = tnl_vport->tnl_ops->hdr_len(OVS_CB(skb)->tun_key);
        tunnel_hlen += sizeof(struct iphdr);
 
-       skb = handle_offloads(skb, rt, tunnel_hlen);
-       if (IS_ERR(skb)) {
-               skb = NULL;
+       min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
+                       + tunnel_hlen
+                       + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+
+       if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
+               int head_delta = SKB_DATA_ALIGN(min_headroom -
+                                               skb_headroom(skb) +
+                                               16);
+
+               err = pskb_expand_head(skb, max_t(int, head_delta, 0),
+                                       0, GFP_ATOMIC);
+               if (unlikely(err))
+                       goto err_free_rt;
+       }
+
+       /* Offloading */
+       nskb = handle_offloads(skb);
+       if (IS_ERR(nskb)) {
+               err = PTR_ERR(nskb);
                goto err_free_rt;
        }
+       skb = nskb;
 
        /* Reset SKB */
        nf_reset(skb);
@@ -278,20 +273,28 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
                        skb_dst_set(skb, &rt_dst(rt));
 
                /* Push Tunnel header. */
-               tnl_vport->tnl_ops->build_header(vport, skb, tunnel_hlen);
+               build_header(vport, skb, tunnel_hlen);
 
                /* Push IP header. */
                iph = ip_hdr(skb);
                iph->version    = 4;
                iph->ihl        = sizeof(struct iphdr) >> 2;
-               iph->protocol   = tnl_vport->tnl_ops->ipproto;
+               iph->protocol   = ipproto;
                iph->daddr      = OVS_CB(skb)->tun_key->ipv4_dst;
                iph->saddr      = saddr;
                iph->tos        = OVS_CB(skb)->tun_key->ipv4_tos;
                iph->ttl        = OVS_CB(skb)->tun_key->ipv4_ttl;
                iph->frag_off   = OVS_CB(skb)->tun_key->tun_flags &
-                                 OVS_TNL_F_DONT_FRAGMENT ?  htons(IP_DF) : 0;
-               ip_select_ident(iph, &rt_dst(rt), NULL);
+                                 TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
+               /*
+                * Allow our local IP stack to fragment the outer packet even
+                * if the DF bit is set as a last resort.  We also need to
+                * force selection of an IP ID here with __ip_select_ident(),
+                * as ip_select_ident() assumes a proper ID is not needed when
+                * when the DF bit is set.
+                */
+               skb->local_df = 1;
+               __ip_select_ident(iph, skb_dst(skb), 0);
 
                memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 
@@ -305,61 +308,10 @@ next:
                skb = next_skb;
        }
 
-       if (unlikely(sent_len == 0))
-               ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
-
        return sent_len;
 
 err_free_rt:
        ip_rt_put(rt);
-error_free:
-       kfree_skb(skb);
-       ovs_vport_record_error(vport, err);
-       return sent_len;
-}
-
-struct vport *ovs_tnl_create(const struct vport_parms *parms,
-                            const struct vport_ops *vport_ops,
-                            const struct tnl_ops *tnl_ops)
-{
-       struct vport *vport;
-       struct tnl_vport *tnl_vport;
-       int err;
-
-       vport = ovs_vport_alloc(sizeof(struct tnl_vport), vport_ops, parms);
-       if (IS_ERR(vport)) {
-               err = PTR_ERR(vport);
-               goto error;
-       }
-
-       tnl_vport = tnl_vport_priv(vport);
-
-       strcpy(tnl_vport->name, parms->name);
-       tnl_vport->tnl_ops = tnl_ops;
-
-       return vport;
-
 error:
-       return ERR_PTR(err);
-}
-
-static void free_port_rcu(struct rcu_head *rcu)
-{
-       struct tnl_vport *tnl_vport = container_of(rcu,
-                                                  struct tnl_vport, rcu);
-
-       ovs_vport_free(vport_from_priv(tnl_vport));
-}
-
-void ovs_tnl_destroy(struct vport *vport)
-{
-       struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
-
-       call_rcu(&tnl_vport->rcu, free_port_rcu);
-}
-
-const char *ovs_tnl_get_name(const struct vport *vport)
-{
-       const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
-       return tnl_vport->name;
+       return err;
 }