X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=datapath%2Fvport-gre.c;h=a29d2e8fd65bc562da4b39c8b2b915dd0decfc57;hb=4816a18f33380a33d381b77d41df39113c94500d;hp=ab89c5b42df4d72ca812c9b5029fa6e2794707ae;hpb=1e3f34c7693bcabae8e443ac1b246680ef9b60e2;p=sliver-openvswitch.git diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c index ab89c5b42..a29d2e8fd 100644 --- a/datapath/vport-gre.c +++ b/datapath/vport-gre.c @@ -32,7 +32,6 @@ #include "datapath.h" #include "tunnel.h" #include "vport.h" -#include "vport-generic.h" /* * The GRE header is composed of a series of sections: a base and then a variable @@ -45,16 +44,25 @@ struct gre_base_hdr { __be16 protocol; }; -static int gre_hdr_len(const struct tnl_mutable_config *mutable) +static int gre_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key) { - int len; + int len = GRE_HEADER_SECTION; - len = GRE_HEADER_SECTION; - - if (mutable->flags & TNL_F_CSUM) + if (tun_key->tun_flags & OVS_TNL_F_KEY) + len += GRE_HEADER_SECTION; + if (tun_key->tun_flags & OVS_TNL_F_CSUM) len += GRE_HEADER_SECTION; + return len; +} + +static int gre64_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key) +{ + /* Set key for GRE64 tunnels, even when key if is zero. */ + int len = GRE_HEADER_SECTION + /* GRE Hdr */ + GRE_HEADER_SECTION + /* GRE Key */ + GRE_HEADER_SECTION; /* GRE SEQ */ - if (mutable->out_key || mutable->flags & TNL_F_OUT_KEY_ACTION) + if (tun_key->tun_flags & OVS_TNL_F_CSUM) len += GRE_HEADER_SECTION; return len; @@ -70,72 +78,74 @@ static __be32 be64_get_low32(__be64 x) #endif } -static void gre_build_header(const struct vport *vport, - const struct tnl_mutable_config *mutable, - void *header) +static __be32 be64_get_high32(__be64 x) { - struct gre_base_hdr *greh = header; - __be32 *options = (__be32 *)(greh + 1); - - greh->protocol = htons(ETH_P_TEB); - greh->flags = 0; - - if (mutable->flags & TNL_F_CSUM) { - greh->flags |= GRE_CSUM; - *options = 0; - options++; - } - - if (mutable->out_key || mutable->flags & TNL_F_OUT_KEY_ACTION) - greh->flags |= GRE_KEY; - - if (mutable->out_key) - *options = be64_get_low32(mutable->out_key); +#ifdef __BIG_ENDIAN + return (__force __be32)((__force u64)x >> 32); +#else + return (__force __be32)x; +#endif } -static struct sk_buff *gre_update_header(const struct vport *vport, - const struct tnl_mutable_config *mutable, - struct dst_entry *dst, - struct sk_buff *skb) +static void __gre_build_header(struct sk_buff *skb, + int tunnel_hlen, + bool is_gre64) { - __be32 *options = (__be32 *)(skb_network_header(skb) + mutable->tunnel_hlen - - GRE_HEADER_SECTION); + const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; + __be32 *options = (__be32 *)(skb_network_header(skb) + tunnel_hlen + - GRE_HEADER_SECTION); + struct gre_base_hdr *greh = (struct gre_base_hdr *) skb_transport_header(skb); + greh->protocol = htons(ETH_P_TEB); + greh->flags = 0; /* Work backwards over the options so the checksum is last. */ - if (mutable->flags & TNL_F_OUT_KEY_ACTION) - *options = be64_get_low32(OVS_CB(skb)->tun_id); - - if (mutable->out_key || mutable->flags & TNL_F_OUT_KEY_ACTION) + if (tun_key->tun_flags & OVS_TNL_F_KEY || is_gre64) { + greh->flags |= GRE_KEY; + if (is_gre64) { + /* Set higher 32 bits to seq. */ + *options = be64_get_high32(tun_key->tun_id); + options--; + greh->flags |= GRE_SEQ; + } + *options = be64_get_low32(tun_key->tun_id); options--; + } - if (mutable->flags & TNL_F_CSUM) + if (tun_key->tun_flags & OVS_TNL_F_CSUM) { + greh->flags |= GRE_CSUM; + *options = 0; *(__sum16 *)options = csum_fold(skb_checksum(skb, skb_transport_offset(skb), skb->len - skb_transport_offset(skb), 0)); - /* - * Allow our local IP stack to fragment the outer packet even if the - * DF bit is set as a last resort. We also need to force selection of - * an IP ID here because Linux will otherwise leave it at 0 if the - * packet originally had DF set. - */ - skb->local_df = 1; - __ip_select_ident(ip_hdr(skb), dst, 0); - - return skb; + } +} + +static void gre_build_header(const struct vport *vport, + struct sk_buff *skb, + int tunnel_hlen) +{ + __gre_build_header(skb, tunnel_hlen, false); +} + +static void gre64_build_header(const struct vport *vport, + struct sk_buff *skb, + int tunnel_hlen) +{ + __gre_build_header(skb, tunnel_hlen, true); } -/* Zero-extends a __be32 into the least-significant 32 bits of a __be64. */ -static __be64 be32_extend_to_be64(__be32 x) +static __be64 key_to_tunnel_id(__be32 key, __be32 seq) { #ifdef __BIG_ENDIAN - return (__force __be64)x; + return (__force __be64)((__force u64)seq << 32 | (__force u32)key); #else - return (__force __be64)((__force u64)x << 32); + return (__force __be64)((__force u64)key << 32 | (__force u32)seq); #endif } -static int parse_header(struct iphdr *iph, __be16 *flags, __be64 *key) +static int parse_header(struct iphdr *iph, __be16 *flags, __be64 *tun_id, + bool *is_gre64) { /* IP and ICMP protocol handlers check that the IHL is valid. */ struct gre_base_hdr *greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2)); @@ -158,140 +168,31 @@ static int parse_header(struct iphdr *iph, __be16 *flags, __be64 *key) } if (greh->flags & GRE_KEY) { - hdr_len += GRE_HEADER_SECTION; - - *key = be32_extend_to_be64(*options); - options++; - } else - *key = 0; + __be32 seq; + __be32 gre_key; - if (unlikely(greh->flags & GRE_SEQ)) + gre_key = *options; hdr_len += GRE_HEADER_SECTION; + options++; - return hdr_len; -} - -/* Called with rcu_read_lock and BH disabled. */ -static void gre_err(struct sk_buff *skb, u32 info) -{ - struct vport *vport; - const struct tnl_mutable_config *mutable; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - int mtu = ntohs(icmp_hdr(skb)->un.frag.mtu); - - struct iphdr *iph; - __be16 flags; - __be64 key; - int tunnel_hdr_len, tot_hdr_len; - unsigned int orig_mac_header; - unsigned int orig_nw_header; - - if (type != ICMP_DEST_UNREACH || code != ICMP_FRAG_NEEDED) - return; - - /* - * The mimimum size packet that we would actually be able to process: - * encapsulating IP header, minimum GRE header, Ethernet header, - * inner IPv4 header. - */ - if (!pskb_may_pull(skb, sizeof(struct iphdr) + GRE_HEADER_SECTION + - ETH_HLEN + sizeof(struct iphdr))) - return; - - iph = (struct iphdr *)skb->data; - if (ipv4_is_multicast(iph->daddr)) - return; - - tunnel_hdr_len = parse_header(iph, &flags, &key); - if (tunnel_hdr_len < 0) - return; - - vport = ovs_tnl_find_port(dev_net(skb->dev), iph->saddr, iph->daddr, key, - TNL_T_PROTO_GRE, &mutable); - if (!vport) - return; - - /* - * Packets received by this function were previously sent by us, so - * any comparisons should be to the output values, not the input. - * However, it's not really worth it to have a hash table based on - * output keys (especially since ICMP error handling of tunneled packets - * isn't that reliable anyways). Therefore, we do a lookup based on the - * out key as if it were the in key and then check to see if the input - * and output keys are the same. - */ - if (mutable->key.in_key != mutable->out_key) - return; - - if (!!(mutable->flags & TNL_F_IN_KEY_MATCH) != - !!(mutable->flags & TNL_F_OUT_KEY_ACTION)) - return; - - if ((mutable->flags & TNL_F_CSUM) && !(flags & GRE_CSUM)) - return; - - tunnel_hdr_len += iph->ihl << 2; - - orig_mac_header = skb_mac_header(skb) - skb->data; - orig_nw_header = skb_network_header(skb) - skb->data; - skb_set_mac_header(skb, tunnel_hdr_len); - - tot_hdr_len = tunnel_hdr_len + ETH_HLEN; - - skb->protocol = eth_hdr(skb)->h_proto; - if (skb->protocol == htons(ETH_P_8021Q)) { - tot_hdr_len += VLAN_HLEN; - skb->protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; - } - - skb_set_network_header(skb, tot_hdr_len); - mtu -= tot_hdr_len; - - if (skb->protocol == htons(ETH_P_IP)) - tot_hdr_len += sizeof(struct iphdr); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (skb->protocol == htons(ETH_P_IPV6)) - tot_hdr_len += sizeof(struct ipv6hdr); -#endif - else - goto out; - - if (!pskb_may_pull(skb, tot_hdr_len)) - goto out; - - if (skb->protocol == htons(ETH_P_IP)) { - if (mtu < IP_MIN_MTU) { - if (ntohs(ip_hdr(skb)->tot_len) >= IP_MIN_MTU) - mtu = IP_MIN_MTU; - else - goto out; + if (greh->flags & GRE_SEQ) { + seq = *options; + *is_gre64 = true; + } else { + seq = 0; + *is_gre64 = false; } - + *tun_id = key_to_tunnel_id(gre_key, seq); + } else { + *tun_id = 0; + /* Ignore GRE seq if there is no key present. */ + *is_gre64 = false; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (skb->protocol == htons(ETH_P_IPV6)) { - if (mtu < IPV6_MIN_MTU) { - unsigned int packet_length = sizeof(struct ipv6hdr) + - ntohs(ipv6_hdr(skb)->payload_len); - - if (packet_length >= IPV6_MIN_MTU - || ntohs(ipv6_hdr(skb)->payload_len) == 0) - mtu = IPV6_MIN_MTU; - else - goto out; - } - } -#endif - __skb_pull(skb, tunnel_hdr_len); - ovs_tnl_frag_needed(vport, mutable, skb, mtu, key); - __skb_push(skb, tunnel_hdr_len); + if (greh->flags & GRE_SEQ) + hdr_len += GRE_HEADER_SECTION; -out: - skb_set_mac_header(skb, orig_mac_header); - skb_set_network_header(skb, orig_nw_header); - skb->protocol = htons(ETH_P_IP); + return hdr_len; } static bool check_checksum(struct sk_buff *skb) @@ -320,46 +221,61 @@ static bool check_checksum(struct sk_buff *skb) return (csum == 0); } +static u32 gre_flags_to_tunnel_flags(__be16 gre_flags, bool is_gre64) +{ + u32 tunnel_flags = 0; + + if (gre_flags & GRE_KEY || is_gre64) + tunnel_flags = OVS_TNL_F_KEY; + + if (gre_flags & GRE_CSUM) + tunnel_flags |= OVS_TNL_F_CSUM; + + return tunnel_flags; +} + /* Called with rcu_read_lock and BH disabled. */ static int gre_rcv(struct sk_buff *skb) { + struct ovs_net *ovs_net; struct vport *vport; - const struct tnl_mutable_config *mutable; int hdr_len; struct iphdr *iph; - __be16 flags; + struct ovs_key_ipv4_tunnel tun_key; + __be16 gre_flags; + u32 tnl_flags; __be64 key; + bool is_gre64; if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr) + ETH_HLEN))) goto error; - if (unlikely(!check_checksum(skb))) goto error; - hdr_len = parse_header(ip_hdr(skb), &flags, &key); + hdr_len = parse_header(ip_hdr(skb), &gre_flags, &key, &is_gre64); if (unlikely(hdr_len < 0)) goto error; - if (unlikely(!pskb_may_pull(skb, hdr_len + ETH_HLEN))) + ovs_net = net_generic(dev_net(skb->dev), ovs_net_id); + if (is_gre64) + vport = rcu_dereference(ovs_net->vport_net.gre64_vport); + else + vport = rcu_dereference(ovs_net->vport_net.gre_vport); + if (unlikely(!vport)) goto error; - iph = ip_hdr(skb); - vport = ovs_tnl_find_port(dev_net(skb->dev), iph->daddr, iph->saddr, key, - TNL_T_PROTO_GRE, &mutable); - if (unlikely(!vport)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + if (unlikely(!pskb_may_pull(skb, hdr_len + ETH_HLEN))) goto error; - } - if (mutable->flags & TNL_F_IN_KEY_MATCH) - OVS_CB(skb)->tun_id = key; - else - OVS_CB(skb)->tun_id = 0; + iph = ip_hdr(skb); + tnl_flags = gre_flags_to_tunnel_flags(gre_flags, is_gre64); + tnl_tun_key_init(&tun_key, iph, key, tnl_flags); + OVS_CB(skb)->tun_key = &tun_key; __skb_pull(skb, hdr_len); skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN); - ovs_tnl_rcv(vport, skb, iph->tos); + ovs_tnl_rcv(vport, skb); return 0; error: @@ -367,31 +283,23 @@ error: return 0; } -static const struct tnl_ops gre_tnl_ops = { - .tunnel_type = TNL_T_PROTO_GRE, - .ipproto = IPPROTO_GRE, - .hdr_len = gre_hdr_len, - .build_header = gre_build_header, - .update_header = gre_update_header, -}; - -static struct vport *gre_create(const struct vport_parms *parms) -{ - return ovs_tnl_create(parms, &ovs_gre_vport_ops, &gre_tnl_ops); -} - static const struct net_protocol gre_protocol_handlers = { .handler = gre_rcv, - .err_handler = gre_err, #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32) .netns_ok = 1, #endif }; +static bool inited; + static int gre_init(void) { int err; + if (inited) + return 0; + + inited = true; err = inet_add_protocol(&gre_protocol_handlers, IPPROTO_GRE); if (err) pr_warn("cannot register gre protocol handler\n"); @@ -401,23 +309,101 @@ static int gre_init(void) static void gre_exit(void) { + if (!inited) + return; + + inited = false; + inet_del_protocol(&gre_protocol_handlers, IPPROTO_GRE); } +/* GRE vport. */ +static const struct tnl_ops gre_tnl_ops = { + .ipproto = IPPROTO_GRE, + .hdr_len = gre_hdr_len, + .build_header = gre_build_header, +}; + +static struct vport *gre_create(const struct vport_parms *parms) +{ + struct net *net = ovs_dp_get_net(parms->dp); + struct ovs_net *ovs_net; + struct vport *vport; + + ovs_net = net_generic(net, ovs_net_id); + if (ovsl_dereference(ovs_net->vport_net.gre_vport)) + return ERR_PTR(-EEXIST); + + vport = ovs_tnl_create(parms, &ovs_gre_vport_ops, &gre_tnl_ops); + + rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport); + return vport; +} + +static void gre_tnl_destroy(struct vport *vport) +{ + struct net *net = ovs_dp_get_net(vport->dp); + struct ovs_net *ovs_net; + + ovs_net = net_generic(net, ovs_net_id); + + rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL); + ovs_tnl_destroy(vport); +} + const struct vport_ops ovs_gre_vport_ops = { .type = OVS_VPORT_TYPE_GRE, .flags = VPORT_F_TUN_ID, .init = gre_init, .exit = gre_exit, .create = gre_create, - .destroy = ovs_tnl_destroy, - .set_addr = ovs_tnl_set_addr, + .destroy = gre_tnl_destroy, + .get_name = ovs_tnl_get_name, + .send = ovs_tnl_send, +}; + +/* GRE64 vport. */ +static const struct tnl_ops gre64_tnl_ops = { + .ipproto = IPPROTO_GRE, + .hdr_len = gre64_hdr_len, + .build_header = gre64_build_header, +}; + +static struct vport *gre64_create(const struct vport_parms *parms) +{ + struct net *net = ovs_dp_get_net(parms->dp); + struct ovs_net *ovs_net; + struct vport *vport; + + ovs_net = net_generic(net, ovs_net_id); + if (ovsl_dereference(ovs_net->vport_net.gre64_vport)) + return ERR_PTR(-EEXIST); + + vport = ovs_tnl_create(parms, &ovs_gre64_vport_ops, &gre64_tnl_ops); + + rcu_assign_pointer(ovs_net->vport_net.gre64_vport, vport); + return vport; +} + + +static void gre64_tnl_destroy(struct vport *vport) +{ + struct net *net = ovs_dp_get_net(vport->dp); + struct ovs_net *ovs_net; + + ovs_net = net_generic(net, ovs_net_id); + + rcu_assign_pointer(ovs_net->vport_net.gre64_vport, NULL); + ovs_tnl_destroy(vport); +} + +const struct vport_ops ovs_gre64_vport_ops = { + .type = OVS_VPORT_TYPE_GRE64, + .flags = VPORT_F_TUN_ID, + .init = gre_init, + .exit = gre_exit, + .create = gre64_create, + .destroy = gre64_tnl_destroy, .get_name = ovs_tnl_get_name, - .get_addr = ovs_tnl_get_addr, - .get_options = ovs_tnl_get_options, - .set_options = ovs_tnl_set_options, - .get_dev_flags = ovs_vport_gen_get_dev_flags, - .is_running = ovs_vport_gen_is_running, - .get_operstate = ovs_vport_gen_get_operstate, .send = ovs_tnl_send, };