* 02110-1301, USA
*/
+#include <linux/kconfig.h>
+#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX)
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/if.h>
#include <linux/if_tunnel.h>
#include <linux/if_vlan.h>
#include <linux/in.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/in_route.h>
+#include <linux/inetdevice.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/rculist.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/route.h>
+#include <net/xfrm.h>
#include <net/icmp.h>
#include <net/ip.h>
+#include <net/ip_tunnels.h>
+#include <net/gre.h>
#include <net/protocol.h>
#include "datapath.h"
-#include "tunnel.h"
#include "vport.h"
-/*
- * The GRE header is composed of a series of sections: a base and then a variable
- * number of options.
- */
-#define GRE_HEADER_SECTION 4
-
-struct gre_base_hdr {
- __be16 flags;
- __be16 protocol;
-};
-
-static int gre_hdr_len(const struct tnl_mutable_config *mutable,
- const struct ovs_key_ipv4_tunnel *tun_key)
-{
- int len;
- u32 flags;
- __be64 out_key;
-
- tnl_get_param(mutable, tun_key, &flags, &out_key);
- len = GRE_HEADER_SECTION;
-
- if (flags & TNL_F_CSUM)
- len += GRE_HEADER_SECTION;
-
- /* Set key for GRE64 tunnels, even when key if is zero. */
- if (out_key ||
- mutable->key.tunnel_type & TNL_T_PROTO_GRE64 ||
- flags & TNL_F_OUT_KEY_ACTION) {
-
- len += GRE_HEADER_SECTION;
- if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64)
- len += GRE_HEADER_SECTION;
- }
- return len;
-}
-
-
/* Returns the least-significant 32 bits of a __be64. */
static __be32 be64_get_low32(__be64 x)
{
#endif
}
-static __be32 be64_get_high32(__be64 x)
+static __be16 filter_tnl_flags(__be16 flags)
{
-#ifdef __BIG_ENDIAN
- return (__force __be32)((__force u64)x >> 32);
-#else
- return (__force __be32)x;
-#endif
+ return flags & (TUNNEL_CSUM | TUNNEL_KEY);
}
-static struct sk_buff *gre_build_header(const struct vport *vport,
- const struct tnl_mutable_config *mutable,
- struct dst_entry *dst,
- struct sk_buff *skb,
- int tunnel_hlen)
+static struct sk_buff *__build_header(struct sk_buff *skb,
+ int tunnel_hlen,
+ __be32 seq, __be16 gre64_flag)
{
- u32 flags;
- __be64 out_key;
const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
- __be32 *options = (__be32 *)(skb_network_header(skb) + tunnel_hlen
- - GRE_HEADER_SECTION);
- struct gre_base_hdr *greh = (struct gre_base_hdr *) skb_transport_header(skb);
-
- tnl_get_param(mutable, tun_key, &flags, &out_key);
-
- greh->protocol = htons(ETH_P_TEB);
- greh->flags = 0;
-
- /* Work backwards over the options so the checksum is last. */
- if (out_key || flags & TNL_F_OUT_KEY_ACTION ||
- mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
- greh->flags |= GRE_KEY;
- if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
- /* Set higher 32 bits to seq. */
- *options = be64_get_high32(out_key);
- options--;
- greh->flags |= GRE_SEQ;
- }
- *options = be64_get_low32(out_key);
- options--;
- }
+ struct tnl_ptk_info tpi;
- if (flags & TNL_F_CSUM) {
- greh->flags |= GRE_CSUM;
- *options = 0;
- *(__sum16 *)options = csum_fold(skb_checksum(skb,
- skb_transport_offset(skb),
- skb->len - skb_transport_offset(skb),
- 0));
- }
- /*
- * Allow our local IP stack to fragment the outer packet even if the
- * DF bit is set as a last resort. We also need to force selection of
- * an IP ID here because Linux will otherwise leave it at 0 if the
- * packet originally had DF set.
- */
- skb->local_df = 1;
- __ip_select_ident(ip_hdr(skb), dst, 0);
+ skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
+ if (IS_ERR(skb))
+ return NULL;
+
+ tpi.flags = filter_tnl_flags(tun_key->tun_flags) | gre64_flag;
+
+ tpi.proto = htons(ETH_P_TEB);
+ tpi.key = be64_get_low32(tun_key->tun_id);
+ tpi.seq = seq;
+ gre_build_header(skb, &tpi, tunnel_hlen);
return skb;
}
#endif
}
-static int parse_header(struct iphdr *iph, __be16 *flags, __be64 *tun_id,
- u32 *tunnel_type)
+/* Called with rcu_read_lock and BH disabled. */
+static int gre_rcv(struct sk_buff *skb,
+ const struct tnl_ptk_info *tpi)
{
- /* IP and ICMP protocol handlers check that the IHL is valid. */
- struct gre_base_hdr *greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2));
- __be32 *options = (__be32 *)(greh + 1);
- int hdr_len;
+ struct ovs_key_ipv4_tunnel tun_key;
+ struct ovs_net *ovs_net;
+ struct vport *vport;
+ __be64 key;
- *flags = greh->flags;
+ ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
+ if ((tpi->flags & TUNNEL_KEY) && (tpi->flags & TUNNEL_SEQ))
+ vport = rcu_dereference(ovs_net->vport_net.gre64_vport);
+ else
+ vport = rcu_dereference(ovs_net->vport_net.gre_vport);
+ if (unlikely(!vport))
+ return PACKET_REJECT;
- if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
- return -EINVAL;
+ key = key_to_tunnel_id(tpi->key, tpi->seq);
+ ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, filter_tnl_flags(tpi->flags));
- if (unlikely(greh->protocol != htons(ETH_P_TEB)))
- return -EINVAL;
+ ovs_vport_receive(vport, skb, &tun_key);
+ return PACKET_RCVD;
+}
+
+static int __send(struct vport *vport, struct sk_buff *skb,
+ int tunnel_hlen,
+ __be32 seq, __be16 gre64_flag)
+{
+ struct net *net = ovs_dp_get_net(vport->dp);
+ struct rtable *rt;
+ int min_headroom;
+ __be16 df;
+ __be32 saddr;
+ int err;
+
+ forward_ip_summed(skb, true);
+
+ /* Route lookup */
+ saddr = OVS_CB(skb)->tun_key->ipv4_src;
+ rt = find_route(ovs_dp_get_net(vport->dp),
+ &saddr,
+ OVS_CB(skb)->tun_key->ipv4_dst,
+ IPPROTO_GRE,
+ OVS_CB(skb)->tun_key->ipv4_tos,
+ skb_get_mark(skb));
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto error;
+ }
- hdr_len = GRE_HEADER_SECTION;
+ min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
+ + tunnel_hlen + sizeof(struct iphdr)
+ + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+
+ if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
+ int head_delta = SKB_DATA_ALIGN(min_headroom -
+ skb_headroom(skb) +
+ 16);
+ err = pskb_expand_head(skb, max_t(int, head_delta, 0),
+ 0, GFP_ATOMIC);
+ if (unlikely(err))
+ goto err_free_rt;
+ }
- if (greh->flags & GRE_CSUM) {
- hdr_len += GRE_HEADER_SECTION;
- options++;
+ if (unlikely(vlan_deaccel_tag(skb))) {
+ err = -ENOMEM;
+ goto err_free_rt;
}
- if (greh->flags & GRE_KEY) {
- __be32 seq;
- __be32 gre_key;
-
- gre_key = *options;
- hdr_len += GRE_HEADER_SECTION;
- options++;
-
- if (greh->flags & GRE_SEQ) {
- seq = *options;
- *tunnel_type = TNL_T_PROTO_GRE64;
- } else {
- seq = 0;
- *tunnel_type = TNL_T_PROTO_GRE;
- }
- *tun_id = key_to_tunnel_id(gre_key, seq);
- } else {
- *tun_id = 0;
- /* Ignore GRE seq if there is no key present. */
- *tunnel_type = TNL_T_PROTO_GRE;
+ /* Push Tunnel header. */
+ skb = __build_header(skb, tunnel_hlen, seq, gre64_flag);
+ if (unlikely(!skb)) {
+ err = 0;
+ goto err_free_rt;
}
- if (greh->flags & GRE_SEQ)
- hdr_len += GRE_HEADER_SECTION;
+ df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
+ htons(IP_DF) : 0;
+
+ skb->local_df = 1;
- return hdr_len;
+ return iptunnel_xmit(net, rt, skb, saddr,
+ OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
+ OVS_CB(skb)->tun_key->ipv4_tos,
+ OVS_CB(skb)->tun_key->ipv4_ttl, df);
+err_free_rt:
+ ip_rt_put(rt);
+error:
+ return err;
}
-static bool check_checksum(struct sk_buff *skb)
+static struct gre_cisco_protocol gre_protocol = {
+ .handler = gre_rcv,
+ .priority = 1,
+};
+
+static int gre_ports;
+static int gre_init(void)
{
- struct iphdr *iph = ip_hdr(skb);
- struct gre_base_hdr *greh = (struct gre_base_hdr *)(iph + 1);
- __sum16 csum = 0;
-
- if (greh->flags & GRE_CSUM) {
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- csum = csum_fold(skb->csum);
-
- if (!csum)
- break;
- /* Fall through. */
-
- case CHECKSUM_NONE:
- skb->csum = 0;
- csum = __skb_checksum_complete(skb);
- skb->ip_summed = CHECKSUM_COMPLETE;
- break;
- }
- }
+ int err;
+
+ gre_ports++;
+ if (gre_ports > 1)
+ return 0;
+
+ err = gre_cisco_register(&gre_protocol);
+ if (err)
+ pr_warn("cannot register gre protocol handler\n");
- return (csum == 0);
+ return err;
}
-static u32 gre_flags_to_tunnel_flags(const struct tnl_mutable_config *mutable,
- __be16 gre_flags, __be64 *key)
+static void gre_exit(void)
{
- u32 tunnel_flags = 0;
-
- if (gre_flags & GRE_KEY) {
- if (mutable->flags & TNL_F_IN_KEY_MATCH ||
- !mutable->key.daddr)
- tunnel_flags = OVS_TNL_F_KEY;
- else
- *key = 0;
- }
+ gre_ports--;
+ if (gre_ports > 0)
+ return;
- if (gre_flags & GRE_CSUM)
- tunnel_flags |= OVS_TNL_F_CSUM;
+ gre_cisco_unregister(&gre_protocol);
+}
- return tunnel_flags;
+static const char *gre_get_name(const struct vport *vport)
+{
+ return vport_priv(vport);
}
-/* Called with rcu_read_lock and BH disabled. */
-static int gre_rcv(struct sk_buff *skb)
+static struct vport *gre_create(const struct vport_parms *parms)
{
+ struct net *net = ovs_dp_get_net(parms->dp);
+ struct ovs_net *ovs_net;
struct vport *vport;
- const struct tnl_mutable_config *mutable;
- int hdr_len;
- struct iphdr *iph;
- struct ovs_key_ipv4_tunnel tun_key;
- __be16 gre_flags;
- u32 tnl_flags;
- __be64 key;
- u32 tunnel_type;
+ int err;
- if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr) + ETH_HLEN)))
- goto error;
- if (unlikely(!check_checksum(skb)))
- goto error;
+ err = gre_init();
+ if (err)
+ return ERR_PTR(err);
- hdr_len = parse_header(ip_hdr(skb), &gre_flags, &key, &tunnel_type);
- if (unlikely(hdr_len < 0))
+ ovs_net = net_generic(net, ovs_net_id);
+ if (ovsl_dereference(ovs_net->vport_net.gre_vport)) {
+ vport = ERR_PTR(-EEXIST);
goto error;
+ }
- if (unlikely(!pskb_may_pull(skb, hdr_len + ETH_HLEN)))
+ vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms);
+ if (IS_ERR(vport))
goto error;
- iph = ip_hdr(skb);
- vport = ovs_tnl_find_port(dev_net(skb->dev), iph->daddr, iph->saddr, key,
- tunnel_type, &mutable);
- if (unlikely(!vport)) {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
- goto error;
- }
+ strncpy(vport_priv(vport), parms->name, IFNAMSIZ);
+ rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport);
+ return vport;
- tnl_flags = gre_flags_to_tunnel_flags(mutable, gre_flags, &key);
- tnl_tun_key_init(&tun_key, iph, key, tnl_flags);
- OVS_CB(skb)->tun_key = &tun_key;
+error:
+ gre_exit();
+ return vport;
+}
- __skb_pull(skb, hdr_len);
- skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN);
+static void gre_tnl_destroy(struct vport *vport)
+{
+ struct net *net = ovs_dp_get_net(vport->dp);
+ struct ovs_net *ovs_net;
- ovs_tnl_rcv(vport, skb);
- return 0;
+ ovs_net = net_generic(net, ovs_net_id);
-error:
- kfree_skb(skb);
- return 0;
+ rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL);
+ ovs_vport_deferred_free(vport);
+ gre_exit();
}
-static const struct tnl_ops gre_tnl_ops = {
- .tunnel_type = TNL_T_PROTO_GRE,
- .ipproto = IPPROTO_GRE,
- .hdr_len = gre_hdr_len,
- .build_header = gre_build_header,
-};
-
-static struct vport *gre_create(const struct vport_parms *parms)
+static int gre_send(struct vport *vport, struct sk_buff *skb)
{
- return ovs_tnl_create(parms, &ovs_gre_vport_ops, &gre_tnl_ops);
-}
+ int hlen;
-static const struct tnl_ops gre64_tnl_ops = {
- .tunnel_type = TNL_T_PROTO_GRE64,
- .ipproto = IPPROTO_GRE,
- .hdr_len = gre_hdr_len,
- .build_header = gre_build_header,
-};
+ if (unlikely(!OVS_CB(skb)->tun_key))
+ return -EINVAL;
-static struct vport *gre_create64(const struct vport_parms *parms)
-{
- return ovs_tnl_create(parms, &ovs_gre64_vport_ops, &gre64_tnl_ops);
+ hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags);
+
+ return __send(vport, skb, hlen, 0, 0);
}
-static const struct net_protocol gre_protocol_handlers = {
- .handler = gre_rcv,
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32)
- .netns_ok = 1,
-#endif
+const struct vport_ops ovs_gre_vport_ops = {
+ .type = OVS_VPORT_TYPE_GRE,
+ .create = gre_create,
+ .destroy = gre_tnl_destroy,
+ .get_name = gre_get_name,
+ .send = gre_send,
};
-static bool inited;
-
-static int gre_init(void)
+/* GRE64 vport. */
+static struct vport *gre64_create(const struct vport_parms *parms)
{
+ struct net *net = ovs_dp_get_net(parms->dp);
+ struct ovs_net *ovs_net;
+ struct vport *vport;
int err;
- if (inited)
- return 0;
-
- inited = true;
- err = inet_add_protocol(&gre_protocol_handlers, IPPROTO_GRE);
+ err = gre_init();
if (err)
- pr_warn("cannot register gre protocol handler\n");
+ return ERR_PTR(err);
- return err;
+ ovs_net = net_generic(net, ovs_net_id);
+ if (ovsl_dereference(ovs_net->vport_net.gre64_vport)) {
+ vport = ERR_PTR(-EEXIST);
+ goto error;
+ }
+
+ vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre64_vport_ops, parms);
+ if (IS_ERR(vport))
+ goto error;
+
+ strncpy(vport_priv(vport), parms->name, IFNAMSIZ);
+ rcu_assign_pointer(ovs_net->vport_net.gre64_vport, vport);
+ return vport;
+error:
+ gre_exit();
+ return vport;
}
-static void gre_exit(void)
+static void gre64_tnl_destroy(struct vport *vport)
{
- if (!inited)
- return;
+ struct net *net = ovs_dp_get_net(vport->dp);
+ struct ovs_net *ovs_net;
- inited = false;
+ ovs_net = net_generic(net, ovs_net_id);
- inet_del_protocol(&gre_protocol_handlers, IPPROTO_GRE);
+ rcu_assign_pointer(ovs_net->vport_net.gre64_vport, NULL);
+ ovs_vport_deferred_free(vport);
+ gre_exit();
}
-const struct vport_ops ovs_gre_vport_ops = {
- .type = OVS_VPORT_TYPE_GRE,
- .flags = VPORT_F_TUN_ID,
- .init = gre_init,
- .exit = gre_exit,
- .create = gre_create,
- .destroy = ovs_tnl_destroy,
- .get_name = ovs_tnl_get_name,
- .get_options = ovs_tnl_get_options,
- .set_options = ovs_tnl_set_options,
- .send = ovs_tnl_send,
-};
+static __be32 be64_get_high32(__be64 x)
+{
+#ifdef __BIG_ENDIAN
+ return (__force __be32)((__force u64)x >> 32);
+#else
+ return (__force __be32)x;
+#endif
+}
+
+static int gre64_send(struct vport *vport, struct sk_buff *skb)
+{
+ int hlen = GRE_HEADER_SECTION + /* GRE Hdr */
+ GRE_HEADER_SECTION + /* GRE Key */
+ GRE_HEADER_SECTION; /* GRE SEQ */
+ __be32 seq;
+
+ if (unlikely(!OVS_CB(skb)->tun_key))
+ return -EINVAL;
+
+ if (OVS_CB(skb)->tun_key->tun_flags & TUNNEL_CSUM)
+ hlen += GRE_HEADER_SECTION;
+
+ seq = be64_get_high32(OVS_CB(skb)->tun_key->tun_id);
+ return __send(vport, skb, hlen, seq, (TUNNEL_KEY|TUNNEL_SEQ));
+}
const struct vport_ops ovs_gre64_vport_ops = {
.type = OVS_VPORT_TYPE_GRE64,
- .flags = VPORT_F_TUN_ID,
- .init = gre_init,
- .exit = gre_exit,
- .create = gre_create64,
- .destroy = ovs_tnl_destroy,
- .get_name = ovs_tnl_get_name,
- .get_options = ovs_tnl_get_options,
- .set_options = ovs_tnl_set_options,
- .send = ovs_tnl_send,
+ .create = gre64_create,
+ .destroy = gre64_tnl_destroy,
+ .get_name = gre_get_name,
+ .send = gre64_send,
};
+#endif