datapath: Factor out common code from *_build_header() to ovs_tnl_send().
[sliver-openvswitch.git] / datapath / vport-gre.c
index 4000c74..40b96cf 100644 (file)
@@ -32,7 +32,6 @@
 #include "datapath.h"
 #include "tunnel.h"
 #include "vport.h"
-#include "vport-generic.h"
 
 /*
  * The GRE header is composed of a series of sections: a base and then a variable
@@ -45,23 +44,29 @@ struct gre_base_hdr {
        __be16 protocol;
 };
 
-static int gre_hdr_len(const struct tnl_mutable_config *mutable)
+static int gre_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
 {
-       int len;
+       int len = GRE_HEADER_SECTION;
 
-       len = GRE_HEADER_SECTION;
-
-       if (mutable->flags & TNL_F_CSUM)
+       if (tun_key->tun_flags & OVS_TNL_F_KEY)
                len += GRE_HEADER_SECTION;
-
-       if (mutable->out_key || mutable->flags & TNL_F_OUT_KEY_ACTION) {
+       if (tun_key->tun_flags & OVS_TNL_F_CSUM)
                len += GRE_HEADER_SECTION;
-               if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64)
-                       len += GRE_HEADER_SECTION;
-       }
        return len;
 }
 
+static int gre64_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
+{
+       /* Set key for GRE64 tunnels, even when key if is zero. */
+       int len = GRE_HEADER_SECTION +          /* GRE Hdr */
+                 GRE_HEADER_SECTION +          /* GRE Key */
+                 GRE_HEADER_SECTION;           /* GRE SEQ */
+
+       if (tun_key->tun_flags & OVS_TNL_F_CSUM)
+               len += GRE_HEADER_SECTION;
+
+       return len;
+}
 
 /* Returns the least-significant 32 bits of a __be64. */
 static __be32 be64_get_low32(__be64 x)
@@ -82,76 +87,52 @@ static __be32 be64_get_high32(__be64 x)
 #endif
 }
 
-static void gre_build_header(const struct vport *vport,
-                            const struct tnl_mutable_config *mutable,
-                            void *header)
+static void __gre_build_header(struct sk_buff *skb,
+                              int tunnel_hlen,
+                              bool is_gre64)
 {
-       struct gre_base_hdr *greh = header;
-       __be32 *options = (__be32 *)(greh + 1);
-
+       const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
+       __be32 *options = (__be32 *)(skb_network_header(skb) + tunnel_hlen
+                       - GRE_HEADER_SECTION);
+       struct gre_base_hdr *greh = (struct gre_base_hdr *) skb_transport_header(skb);
        greh->protocol = htons(ETH_P_TEB);
        greh->flags = 0;
 
-       if (mutable->flags & TNL_F_CSUM) {
-               greh->flags |= GRE_CSUM;
-               *options = 0;
-               options++;
-       }
-
-       if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
-               greh->flags |= GRE_KEY;
-               if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64)
-                       greh->flags |= GRE_SEQ;
-
-       } else if (mutable->out_key) {
-               greh->flags |= GRE_KEY;
-               *options = be64_get_low32(mutable->out_key);
-               if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
-                       options++;
-                       *options = be64_get_high32(mutable->out_key);
-                       greh->flags |= GRE_SEQ;
-               }
-       }
-}
-
-static struct sk_buff *gre_update_header(const struct vport *vport,
-                                        const struct tnl_mutable_config *mutable,
-                                        struct dst_entry *dst,
-                                        struct sk_buff *skb)
-{
-       __be32 *options = (__be32 *)(skb_network_header(skb) + mutable->tunnel_hlen
-                                              - GRE_HEADER_SECTION);
-
        /* Work backwards over the options so the checksum is last. */
-       if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
-               if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
+       if (tun_key->tun_flags & OVS_TNL_F_KEY || is_gre64) {
+               greh->flags |= GRE_KEY;
+               if (is_gre64) {
                        /* Set higher 32 bits to seq. */
-                       *options = be64_get_high32(OVS_CB(skb)->tun_id);
+                       *options = be64_get_high32(tun_key->tun_id);
                        options--;
+                       greh->flags |= GRE_SEQ;
                }
-               *options = be64_get_low32(OVS_CB(skb)->tun_id);
+               *options = be64_get_low32(tun_key->tun_id);
                options--;
-       } else if (mutable->out_key) {
-               options--;
-               if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64)
-                       options--;
        }
 
-       if (mutable->flags & TNL_F_CSUM)
+       if (tun_key->tun_flags & OVS_TNL_F_CSUM) {
+               greh->flags |= GRE_CSUM;
+               *options = 0;
                *(__sum16 *)options = csum_fold(skb_checksum(skb,
                                                skb_transport_offset(skb),
                                                skb->len - skb_transport_offset(skb),
                                                0));
-       /*
-        * Allow our local IP stack to fragment the outer packet even if the
-        * DF bit is set as a last resort.  We also need to force selection of
-        * an IP ID here because Linux will otherwise leave it at 0 if the
-        * packet originally had DF set.
-        */
-       skb->local_df = 1;
-       __ip_select_ident(ip_hdr(skb), dst, 0);
-
-       return skb;
+       }
+}
+
+static void gre_build_header(const struct vport *vport,
+                            struct sk_buff *skb,
+                            int tunnel_hlen)
+{
+       __gre_build_header(skb, tunnel_hlen, false);
+}
+
+static void gre64_build_header(const struct vport *vport,
+                              struct sk_buff *skb,
+                              int tunnel_hlen)
+{
+       __gre_build_header(skb, tunnel_hlen, true);
 }
 
 static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
@@ -164,7 +145,7 @@ static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
 }
 
 static int parse_header(struct iphdr *iph, __be16 *flags, __be64 *tun_id,
-                       u32 *tunnel_type)
+                       bool *is_gre64)
 {
        /* IP and ICMP protocol handlers check that the IHL is valid. */
        struct gre_base_hdr *greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2));
@@ -196,16 +177,16 @@ static int parse_header(struct iphdr *iph, __be16 *flags, __be64 *tun_id,
 
                if (greh->flags & GRE_SEQ) {
                        seq = *options;
-                       *tunnel_type = TNL_T_PROTO_GRE64;
+                       *is_gre64 = true;
                } else {
                        seq = 0;
-                       *tunnel_type = TNL_T_PROTO_GRE;
+                       *is_gre64 = false;
                }
                *tun_id = key_to_tunnel_id(gre_key, seq);
        } else {
                *tun_id = 0;
                /* Ignore GRE seq if there is no key present. */
-               *tunnel_type = TNL_T_PROTO_GRE;
+               *is_gre64 = false;
        }
 
        if (greh->flags & GRE_SEQ)
@@ -214,130 +195,6 @@ static int parse_header(struct iphdr *iph, __be16 *flags, __be64 *tun_id,
        return hdr_len;
 }
 
-/* Called with rcu_read_lock and BH disabled. */
-static void gre_err(struct sk_buff *skb, u32 info)
-{
-       struct vport *vport;
-       const struct tnl_mutable_config *mutable;
-       const int type = icmp_hdr(skb)->type;
-       const int code = icmp_hdr(skb)->code;
-       int mtu = ntohs(icmp_hdr(skb)->un.frag.mtu);
-       u32 tunnel_type;
-
-       struct iphdr *iph;
-       __be16 flags;
-       __be64 key;
-       int tunnel_hdr_len, tot_hdr_len;
-       unsigned int orig_mac_header;
-       unsigned int orig_nw_header;
-
-       if (type != ICMP_DEST_UNREACH || code != ICMP_FRAG_NEEDED)
-               return;
-
-       /*
-        * The mimimum size packet that we would actually be able to process:
-        * encapsulating IP header, minimum GRE header, Ethernet header,
-        * inner IPv4 header.
-        */
-       if (!pskb_may_pull(skb, sizeof(struct iphdr) + GRE_HEADER_SECTION +
-                               ETH_HLEN + sizeof(struct iphdr)))
-               return;
-
-       iph = (struct iphdr *)skb->data;
-       if (ipv4_is_multicast(iph->daddr))
-               return;
-
-       tunnel_hdr_len = parse_header(iph, &flags, &key, &tunnel_type);
-       if (tunnel_hdr_len < 0)
-               return;
-
-       vport = ovs_tnl_find_port(dev_net(skb->dev), iph->saddr, iph->daddr, key,
-                                 tunnel_type, &mutable);
-       if (!vport)
-               return;
-
-       /*
-        * Packets received by this function were previously sent by us, so
-        * any comparisons should be to the output values, not the input.
-        * However, it's not really worth it to have a hash table based on
-        * output keys (especially since ICMP error handling of tunneled packets
-        * isn't that reliable anyways).  Therefore, we do a lookup based on the
-        * out key as if it were the in key and then check to see if the input
-        * and output keys are the same.
-        */
-       if (mutable->key.in_key != mutable->out_key)
-               return;
-
-       if (!!(mutable->flags & TNL_F_IN_KEY_MATCH) !=
-           !!(mutable->flags & TNL_F_OUT_KEY_ACTION))
-               return;
-
-       if ((mutable->flags & TNL_F_CSUM) && !(flags & GRE_CSUM))
-               return;
-
-       tunnel_hdr_len += iph->ihl << 2;
-
-       orig_mac_header = skb_mac_header(skb) - skb->data;
-       orig_nw_header = skb_network_header(skb) - skb->data;
-       skb_set_mac_header(skb, tunnel_hdr_len);
-
-       tot_hdr_len = tunnel_hdr_len + ETH_HLEN;
-
-       skb->protocol = eth_hdr(skb)->h_proto;
-       if (skb->protocol == htons(ETH_P_8021Q)) {
-               tot_hdr_len += VLAN_HLEN;
-               skb->protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
-       }
-
-       skb_set_network_header(skb, tot_hdr_len);
-       mtu -= tot_hdr_len;
-
-       if (skb->protocol == htons(ETH_P_IP))
-               tot_hdr_len += sizeof(struct iphdr);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-       else if (skb->protocol == htons(ETH_P_IPV6))
-               tot_hdr_len += sizeof(struct ipv6hdr);
-#endif
-       else
-               goto out;
-
-       if (!pskb_may_pull(skb, tot_hdr_len))
-               goto out;
-
-       if (skb->protocol == htons(ETH_P_IP)) {
-               if (mtu < IP_MIN_MTU) {
-                       if (ntohs(ip_hdr(skb)->tot_len) >= IP_MIN_MTU)
-                               mtu = IP_MIN_MTU;
-                       else
-                               goto out;
-               }
-
-       }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-       else if (skb->protocol == htons(ETH_P_IPV6)) {
-               if (mtu < IPV6_MIN_MTU) {
-                       unsigned int packet_length = sizeof(struct ipv6hdr) +
-                                             ntohs(ipv6_hdr(skb)->payload_len);
-
-                       if (packet_length >= IPV6_MIN_MTU
-                           || ntohs(ipv6_hdr(skb)->payload_len) == 0)
-                               mtu = IPV6_MIN_MTU;
-                       else
-                               goto out;
-               }
-       }
-#endif
-
-       __skb_pull(skb, tunnel_hdr_len);
-       ovs_tnl_frag_needed(vport, mutable, skb, mtu, key);
-       __skb_push(skb, tunnel_hdr_len);
-
-out:
-       skb_set_mac_header(skb, orig_mac_header);
-       skb_set_network_header(skb, orig_nw_header);
-       skb->protocol = htons(ETH_P_IP);
-}
-
 static bool check_checksum(struct sk_buff *skb)
 {
        struct iphdr *iph = ip_hdr(skb);
@@ -364,46 +221,61 @@ static bool check_checksum(struct sk_buff *skb)
        return (csum == 0);
 }
 
+static u32 gre_flags_to_tunnel_flags(__be16 gre_flags, bool is_gre64)
+{
+       u32 tunnel_flags = 0;
+
+       if (gre_flags & GRE_KEY || is_gre64)
+               tunnel_flags = OVS_TNL_F_KEY;
+
+       if (gre_flags & GRE_CSUM)
+               tunnel_flags |= OVS_TNL_F_CSUM;
+
+       return tunnel_flags;
+}
+
 /* Called with rcu_read_lock and BH disabled. */
 static int gre_rcv(struct sk_buff *skb)
 {
+       struct ovs_net *ovs_net;
        struct vport *vport;
-       const struct tnl_mutable_config *mutable;
        int hdr_len;
        struct iphdr *iph;
-       __be16 flags;
+       struct ovs_key_ipv4_tunnel tun_key;
+       __be16 gre_flags;
+       u32 tnl_flags;
        __be64 key;
-       u32 tunnel_type;
+       bool is_gre64;
 
        if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr) + ETH_HLEN)))
                goto error;
        if (unlikely(!check_checksum(skb)))
                goto error;
 
-       hdr_len = parse_header(ip_hdr(skb), &flags, &key, &tunnel_type);
+       hdr_len = parse_header(ip_hdr(skb), &gre_flags, &key, &is_gre64);
        if (unlikely(hdr_len < 0))
                goto error;
 
-       if (unlikely(!pskb_may_pull(skb, hdr_len + ETH_HLEN)))
+       ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
+       if (is_gre64)
+               vport = rcu_dereference(ovs_net->vport_net.gre64_vport);
+       else
+               vport = rcu_dereference(ovs_net->vport_net.gre_vport);
+       if (unlikely(!vport))
                goto error;
 
-       iph = ip_hdr(skb);
-       vport = ovs_tnl_find_port(dev_net(skb->dev), iph->daddr, iph->saddr, key,
-                                 tunnel_type, &mutable);
-       if (unlikely(!vport)) {
-               icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+       if (unlikely(!pskb_may_pull(skb, hdr_len + ETH_HLEN)))
                goto error;
-       }
 
-       if (mutable->flags & TNL_F_IN_KEY_MATCH)
-               OVS_CB(skb)->tun_id = key;
-       else
-               OVS_CB(skb)->tun_id = 0;
+       iph = ip_hdr(skb);
+       tnl_flags = gre_flags_to_tunnel_flags(gre_flags, is_gre64);
+       tnl_tun_key_init(&tun_key, iph, key, tnl_flags);
+       OVS_CB(skb)->tun_key = &tun_key;
 
        __skb_pull(skb, hdr_len);
        skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN);
 
-       ovs_tnl_rcv(vport, skb, iph->tos);
+       ovs_tnl_rcv(vport, skb);
        return 0;
 
 error:
@@ -411,35 +283,8 @@ error:
        return 0;
 }
 
-static const struct tnl_ops gre_tnl_ops = {
-       .tunnel_type    = TNL_T_PROTO_GRE,
-       .ipproto        = IPPROTO_GRE,
-       .hdr_len        = gre_hdr_len,
-       .build_header   = gre_build_header,
-       .update_header  = gre_update_header,
-};
-
-static struct vport *gre_create(const struct vport_parms *parms)
-{
-       return ovs_tnl_create(parms, &ovs_gre_vport_ops, &gre_tnl_ops);
-}
-
-static const struct tnl_ops gre64_tnl_ops = {
-       .tunnel_type    = TNL_T_PROTO_GRE64,
-       .ipproto        = IPPROTO_GRE,
-       .hdr_len        = gre_hdr_len,
-       .build_header   = gre_build_header,
-       .update_header  = gre_update_header,
-};
-
-static struct vport *gre_create64(const struct vport_parms *parms)
-{
-       return ovs_tnl_create(parms, &ovs_gre64_vport_ops, &gre64_tnl_ops);
-}
-
 static const struct net_protocol gre_protocol_handlers = {
        .handler        =       gre_rcv,
-       .err_handler    =       gre_err,
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32)
        .netns_ok       =       1,
 #endif
@@ -472,38 +317,93 @@ static void gre_exit(void)
        inet_del_protocol(&gre_protocol_handlers, IPPROTO_GRE);
 }
 
+/* GRE vport. */
+static const struct tnl_ops gre_tnl_ops = {
+       .ipproto        = IPPROTO_GRE,
+       .hdr_len        = gre_hdr_len,
+       .build_header   = gre_build_header,
+};
+
+static struct vport *gre_create(const struct vport_parms *parms)
+{
+       struct net *net = ovs_dp_get_net(parms->dp);
+       struct ovs_net *ovs_net;
+       struct vport *vport;
+
+       ovs_net = net_generic(net, ovs_net_id);
+       if (rtnl_dereference(ovs_net->vport_net.gre_vport))
+               return ERR_PTR(-EEXIST);
+
+       vport = ovs_tnl_create(parms, &ovs_gre_vport_ops, &gre_tnl_ops);
+
+       rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport);
+       return vport;
+}
+
+static void gre_tnl_destroy(struct vport *vport)
+{
+       struct net *net = ovs_dp_get_net(vport->dp);
+       struct ovs_net *ovs_net;
+
+       ovs_net = net_generic(net, ovs_net_id);
+
+       rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL);
+       ovs_tnl_destroy(vport);
+}
+
 const struct vport_ops ovs_gre_vport_ops = {
        .type           = OVS_VPORT_TYPE_GRE,
        .flags          = VPORT_F_TUN_ID,
        .init           = gre_init,
        .exit           = gre_exit,
        .create         = gre_create,
-       .destroy        = ovs_tnl_destroy,
-       .set_addr       = ovs_tnl_set_addr,
+       .destroy        = gre_tnl_destroy,
        .get_name       = ovs_tnl_get_name,
-       .get_addr       = ovs_tnl_get_addr,
-       .get_options    = ovs_tnl_get_options,
-       .set_options    = ovs_tnl_set_options,
-       .get_dev_flags  = ovs_vport_gen_get_dev_flags,
-       .is_running     = ovs_vport_gen_is_running,
-       .get_operstate  = ovs_vport_gen_get_operstate,
        .send           = ovs_tnl_send,
 };
 
+/* GRE64 vport. */
+static const struct tnl_ops gre64_tnl_ops = {
+       .ipproto        = IPPROTO_GRE,
+       .hdr_len        = gre64_hdr_len,
+       .build_header   = gre64_build_header,
+};
+
+static struct vport *gre64_create(const struct vport_parms *parms)
+{
+       struct net *net = ovs_dp_get_net(parms->dp);
+       struct ovs_net *ovs_net;
+       struct vport *vport;
+
+       ovs_net = net_generic(net, ovs_net_id);
+       if (rtnl_dereference(ovs_net->vport_net.gre64_vport))
+               return ERR_PTR(-EEXIST);
+
+       vport = ovs_tnl_create(parms, &ovs_gre64_vport_ops, &gre64_tnl_ops);
+
+       rcu_assign_pointer(ovs_net->vport_net.gre64_vport, vport);
+       return vport;
+}
+
+
+static void gre64_tnl_destroy(struct vport *vport)
+{
+       struct net *net = ovs_dp_get_net(vport->dp);
+       struct ovs_net *ovs_net;
+
+       ovs_net = net_generic(net, ovs_net_id);
+
+       rcu_assign_pointer(ovs_net->vport_net.gre64_vport, NULL);
+       ovs_tnl_destroy(vport);
+}
+
 const struct vport_ops ovs_gre64_vport_ops = {
        .type           = OVS_VPORT_TYPE_GRE64,
        .flags          = VPORT_F_TUN_ID,
        .init           = gre_init,
        .exit           = gre_exit,
-       .create         = gre_create64,
-       .destroy        = ovs_tnl_destroy,
-       .set_addr       = ovs_tnl_set_addr,
+       .create         = gre64_create,
+       .destroy        = gre64_tnl_destroy,
        .get_name       = ovs_tnl_get_name,
-       .get_addr       = ovs_tnl_get_addr,
-       .get_options    = ovs_tnl_get_options,
-       .set_options    = ovs_tnl_set_options,
-       .get_dev_flags  = ovs_vport_gen_get_dev_flags,
-       .is_running     = ovs_vport_gen_is_running,
-       .get_operstate  = ovs_vport_gen_get_operstate,
        .send           = ovs_tnl_send,
 };