From: Pravin B Shelar Date: Tue, 13 Aug 2013 07:19:53 +0000 (-0700) Subject: datapath: Move generic tunnel functions to lisp module. X-Git-Tag: sliver-openvswitch-2.0.90-1~27^2 X-Git-Url: http://git.onelab.eu/?p=sliver-openvswitch.git;a=commitdiff_plain;h=11aa8dff934c06a24ec23556c51122025c2232fa datapath: Move generic tunnel functions to lisp module. Generic tunnel rcv and send function are only used by lisp tunneling module, so It make sense to move them to lisp module. CC: Lori Jakab Signed-off-by: Pravin B Shelar Acked-by: Lorand Jakab Acked-by: Jesse Gross --- diff --git a/datapath/Modules.mk b/datapath/Modules.mk index 2ce888894..ccf4dfac0 100644 --- a/datapath/Modules.mk +++ b/datapath/Modules.mk @@ -12,7 +12,6 @@ openvswitch_sources = \ datapath.c \ dp_notify.c \ flow.c \ - tunnel.c \ vlan.c \ vport.c \ vport-gre.c \ @@ -26,7 +25,6 @@ openvswitch_headers = \ compat.h \ datapath.h \ flow.h \ - tunnel.h \ vlan.h \ vport.h \ vport-internal_dev.h \ diff --git a/datapath/datapath.c b/datapath/datapath.c index a6bf70ca9..48f17c091 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -58,7 +58,6 @@ #include "datapath.h" #include "flow.h" #include "vlan.h" -#include "tunnel.h" #include "vport-internal_dev.h" #include "vport-netdev.h" diff --git a/datapath/datapath.h b/datapath/datapath.h index eda87fdd3..064211dbc 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -29,7 +29,6 @@ #include "checksum.h" #include "compat.h" #include "flow.h" -#include "tunnel.h" #include "vlan.h" #include "vport.h" diff --git a/datapath/dp_notify.c b/datapath/dp_notify.c index ec573a51f..d5308933f 100644 --- a/datapath/dp_notify.c +++ b/datapath/dp_notify.c @@ -18,6 +18,8 @@ #include #include +#include +#include #include "datapath.h" #include "vport-internal_dev.h" diff --git a/datapath/tunnel.c b/datapath/tunnel.c deleted file mode 100644 index c2c3df4c2..000000000 --- a/datapath/tunnel.c +++ /dev/null @@ -1,286 +0,0 @@ -/* - * Copyright (c) 2007-2012 Nicira, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "checksum.h" -#include "compat.h" -#include "datapath.h" -#include "tunnel.h" -#include "vlan.h" -#include "vport.h" - -/** - * ovs_tnl_rcv - ingress point for generic tunnel code - * - * @vport: port this packet was received on - * @skb: received packet - * @tos: ToS from encapsulating IP packet, used to copy ECN bits - * - * Must be called with rcu_read_lock. - * - * Packets received by this function are in the following state: - * - skb->data points to the inner Ethernet header. - * - The inner Ethernet header is in the linear data area. - * - skb->csum does not include the inner Ethernet header. - * - The layer pointers are undefined. - */ -void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, - struct ovs_key_ipv4_tunnel *tun_key) -{ - struct ethhdr *eh; - - skb_reset_mac_header(skb); - eh = eth_hdr(skb); - - if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) - skb->protocol = eh->h_proto; - else - skb->protocol = htons(ETH_P_802_2); - - skb_dst_drop(skb); - nf_reset(skb); - skb_clear_rxhash(skb); - secpath_reset(skb); - vlan_set_tci(skb, 0); - - if (unlikely(compute_ip_summed(skb, false))) { - kfree_skb(skb); - return; - } - - ovs_vport_receive(vport, skb, tun_key); -} - -static bool need_linearize(const struct sk_buff *skb) -{ - int i; - - if (unlikely(skb_shinfo(skb)->frag_list)) - return true; - - /* - * Generally speaking we should linearize if there are paged frags. - * However, if all of the refcounts are 1 we know nobody else can - * change them from underneath us and we can skip the linearization. - */ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - if (unlikely(page_count(skb_frag_page(&skb_shinfo(skb)->frags[i])) > 1)) - return true; - - return false; -} - -static struct sk_buff *handle_offloads(struct sk_buff *skb) -{ - int err; - - forward_ip_summed(skb, true); - - if (skb_is_gso(skb)) { - struct sk_buff *nskb; - char cb[sizeof(skb->cb)]; - - memcpy(cb, skb->cb, sizeof(cb)); - - nskb = __skb_gso_segment(skb, 0, false); - if (IS_ERR(nskb)) { - err = PTR_ERR(nskb); - goto error; - } - - consume_skb(skb); - skb = nskb; - while (nskb) { - memcpy(nskb->cb, cb, sizeof(cb)); - nskb = nskb->next; - } - } else if (get_ip_summed(skb) == OVS_CSUM_PARTIAL) { - /* Pages aren't locked and could change at any time. - * If this happens after we compute the checksum, the - * checksum will be wrong. We linearize now to avoid - * this problem. - */ - if (unlikely(need_linearize(skb))) { - err = __skb_linearize(skb); - if (unlikely(err)) - goto error; - } - - err = skb_checksum_help(skb); - if (unlikely(err)) - goto error; - } - - set_ip_summed(skb, OVS_CSUM_NONE); - - return skb; - -error: - return ERR_PTR(err); -} - -/* Compute source UDP port for outgoing packet. - * Currently we use the flow hash. - */ -u16 ovs_tnl_get_src_port(struct sk_buff *skb) -{ - int low; - int high; - unsigned int range; - struct sw_flow_key *pkt_key = OVS_CB(skb)->pkt_key; - u32 hash = jhash2((const u32 *)pkt_key, - sizeof(*pkt_key) / sizeof(u32), 0); - - inet_get_local_port_range(&low, &high); - range = (high - low) + 1; - return (((u64) hash * range) >> 32) + low; -} - -int ovs_tnl_send(struct vport *vport, struct sk_buff *skb, - u8 ipproto, int tunnel_hlen, - void (*build_header)(const struct vport *, - struct sk_buff *, - int tunnel_hlen)) -{ - int min_headroom; - struct rtable *rt; - __be32 saddr; - int sent_len = 0; - int err; - struct sk_buff *nskb; - - /* Route lookup */ - saddr = OVS_CB(skb)->tun_key->ipv4_src; - rt = find_route(ovs_dp_get_net(vport->dp), - &saddr, - OVS_CB(skb)->tun_key->ipv4_dst, - ipproto, - OVS_CB(skb)->tun_key->ipv4_tos, - skb_get_mark(skb)); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); - goto error; - } - - tunnel_hlen += sizeof(struct iphdr); - - min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len - + tunnel_hlen - + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); - - if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { - int head_delta = SKB_DATA_ALIGN(min_headroom - - skb_headroom(skb) + - 16); - - err = pskb_expand_head(skb, max_t(int, head_delta, 0), - 0, GFP_ATOMIC); - if (unlikely(err)) - goto err_free_rt; - } - - /* Offloading */ - nskb = handle_offloads(skb); - if (IS_ERR(nskb)) { - err = PTR_ERR(nskb); - goto err_free_rt; - } - skb = nskb; - - /* Reset SKB */ - nf_reset(skb); - secpath_reset(skb); - skb_dst_drop(skb); - skb_clear_rxhash(skb); - - while (skb) { - struct sk_buff *next_skb = skb->next; - struct iphdr *iph; - int frag_len; - - skb->next = NULL; - - if (unlikely(vlan_deaccel_tag(skb))) - goto next; - - frag_len = skb->len; - skb_push(skb, tunnel_hlen); - skb_reset_network_header(skb); - skb_set_transport_header(skb, sizeof(struct iphdr)); - - if (next_skb) - skb_dst_set(skb, dst_clone(&rt_dst(rt))); - else - skb_dst_set(skb, &rt_dst(rt)); - - /* Push Tunnel header. */ - build_header(vport, skb, tunnel_hlen); - - /* Push IP header. */ - iph = ip_hdr(skb); - iph->version = 4; - iph->ihl = sizeof(struct iphdr) >> 2; - iph->protocol = ipproto; - iph->daddr = OVS_CB(skb)->tun_key->ipv4_dst; - iph->saddr = saddr; - iph->tos = OVS_CB(skb)->tun_key->ipv4_tos; - iph->ttl = OVS_CB(skb)->tun_key->ipv4_ttl; - iph->frag_off = OVS_CB(skb)->tun_key->tun_flags & - TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; - /* - * Allow our local IP stack to fragment the outer packet even - * if the DF bit is set as a last resort. We also need to - * force selection of an IP ID here with __ip_select_ident(), - * as ip_select_ident() assumes a proper ID is not needed when - * when the DF bit is set. - */ - skb->local_df = 1; - __ip_select_ident(iph, skb_dst(skb), 0); - - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - - err = ip_local_out(skb); - if (unlikely(net_xmit_eval(err))) - goto next; - - sent_len += frag_len; - -next: - skb = next_skb; - } - - return sent_len; - -err_free_rt: - ip_rt_put(rt); -error: - return err; -} diff --git a/datapath/tunnel.h b/datapath/tunnel.h deleted file mode 100644 index 7625dbe66..000000000 --- a/datapath/tunnel.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2007-2012 Nicira, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - */ - -#ifndef TUNNEL_H -#define TUNNEL_H 1 - -#include -#include -#include - -#include "flow.h" -#include "vport.h" - -u16 ovs_tnl_get_src_port(struct sk_buff *skb); - -int ovs_tnl_send(struct vport *vport, struct sk_buff *skb, - u8 ipproto, int tunnel_hlen, - void (*build_header)(const struct vport *, - struct sk_buff *, - int tunnel_hlen)); - -void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, - struct ovs_key_ipv4_tunnel *tun_key); - -#endif /* TUNNEL_H */ diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c index 9bba42a41..5af6dbec7 100644 --- a/datapath/vport-gre.c +++ b/datapath/vport-gre.c @@ -35,10 +35,11 @@ #include #include #include +#include +#include #include #include - #include #include #include @@ -46,7 +47,6 @@ #include #include "datapath.h" -#include "tunnel.h" #include "vport.h" /* Returns the least-significant 32 bits of a __be64. */ diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c index 6027a164e..2f62d1193 100644 --- a/datapath/vport-lisp.c +++ b/datapath/vport-lisp.c @@ -30,13 +30,13 @@ #include #include +#include #include +#include #include "datapath.h" -#include "tunnel.h" #include "vport.h" - /* * LISP encapsulation header: * @@ -160,6 +160,23 @@ static __be64 instance_id_to_tunnel_id(__u8 *iid) #endif } +/* Compute source UDP port for outgoing packet. + * Currently we use the flow hash. + */ +static u16 ovs_tnl_get_src_port(struct sk_buff *skb) +{ + int low; + int high; + unsigned int range; + struct sw_flow_key *pkt_key = OVS_CB(skb)->pkt_key; + u32 hash = jhash2((const u32 *)pkt_key, + sizeof(*pkt_key) / sizeof(u32), 0); + + inet_get_local_port_range(&low, &high); + range = (high - low) + 1; + return (((u64) hash * range) >> 32) + low; +} + static void lisp_build_header(const struct vport *vport, struct sk_buff *skb, int tunnel_hlen) @@ -189,6 +206,48 @@ static void lisp_build_header(const struct vport *vport, lisph->u2.word2.locator_status_bits = 1; } +/** + * ovs_tnl_rcv - ingress point for generic tunnel code + * + * @vport: port this packet was received on + * @skb: received packet + * @tos: ToS from encapsulating IP packet, used to copy ECN bits + * + * Must be called with rcu_read_lock. + * + * Packets received by this function are in the following state: + * - skb->data points to the inner Ethernet header. + * - The inner Ethernet header is in the linear data area. + * - skb->csum does not include the inner Ethernet header. + * - The layer pointers are undefined. + */ +static void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, + struct ovs_key_ipv4_tunnel *tun_key) +{ + struct ethhdr *eh; + + skb_reset_mac_header(skb); + eh = eth_hdr(skb); + + if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) + skb->protocol = eh->h_proto; + else + skb->protocol = htons(ETH_P_802_2); + + skb_dst_drop(skb); + nf_reset(skb); + skb_clear_rxhash(skb); + secpath_reset(skb); + vlan_set_tci(skb, 0); + + if (unlikely(compute_ip_summed(skb, false))) { + kfree_skb(skb); + return; + } + + ovs_vport_receive(vport, skb, tun_key); +} + /* Called with rcu_read_lock and BH disabled. */ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) { @@ -361,6 +420,196 @@ error: return ERR_PTR(err); } +static bool need_linearize(const struct sk_buff *skb) +{ + int i; + + if (unlikely(skb_shinfo(skb)->frag_list)) + return true; + + /* + * Generally speaking we should linearize if there are paged frags. + * However, if all of the refcounts are 1 we know nobody else can + * change them from underneath us and we can skip the linearization. + */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + if (unlikely(page_count(skb_frag_page(&skb_shinfo(skb)->frags[i])) > 1)) + return true; + + return false; +} + +static struct sk_buff *handle_offloads(struct sk_buff *skb) +{ + int err; + + forward_ip_summed(skb, true); + + + if (skb_is_gso(skb)) { + struct sk_buff *nskb; + char cb[sizeof(skb->cb)]; + + memcpy(cb, skb->cb, sizeof(cb)); + + nskb = __skb_gso_segment(skb, 0, false); + if (IS_ERR(nskb)) { + err = PTR_ERR(nskb); + goto error; + } + + consume_skb(skb); + skb = nskb; + while (nskb) { + memcpy(nskb->cb, cb, sizeof(cb)); + nskb = nskb->next; + } + } else if (get_ip_summed(skb) == OVS_CSUM_PARTIAL) { + /* Pages aren't locked and could change at any time. + * If this happens after we compute the checksum, the + * checksum will be wrong. We linearize now to avoid + * this problem. + */ + if (unlikely(need_linearize(skb))) { + err = __skb_linearize(skb); + if (unlikely(err)) + goto error; + } + + err = skb_checksum_help(skb); + if (unlikely(err)) + goto error; + } + + set_ip_summed(skb, OVS_CSUM_NONE); + + return skb; + +error: + return ERR_PTR(err); +} + +static int ovs_tnl_send(struct vport *vport, struct sk_buff *skb, + u8 ipproto, int tunnel_hlen, + void (*build_header)(const struct vport *, + struct sk_buff *, + int tunnel_hlen)) +{ + int min_headroom; + struct rtable *rt; + __be32 saddr; + int sent_len = 0; + int err; + struct sk_buff *nskb; + + /* Route lookup */ + saddr = OVS_CB(skb)->tun_key->ipv4_src; + rt = find_route(ovs_dp_get_net(vport->dp), + &saddr, + OVS_CB(skb)->tun_key->ipv4_dst, + ipproto, + OVS_CB(skb)->tun_key->ipv4_tos, + skb_get_mark(skb)); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto error; + } + + tunnel_hlen += sizeof(struct iphdr); + + min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len + + tunnel_hlen + + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + + if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { + int head_delta = SKB_DATA_ALIGN(min_headroom - + skb_headroom(skb) + + 16); + + err = pskb_expand_head(skb, max_t(int, head_delta, 0), + 0, GFP_ATOMIC); + if (unlikely(err)) + goto err_free_rt; + } + + /* Offloading */ + nskb = handle_offloads(skb); + if (IS_ERR(nskb)) { + err = PTR_ERR(nskb); + goto err_free_rt; + } + skb = nskb; + + /* Reset SKB */ + nf_reset(skb); + secpath_reset(skb); + skb_dst_drop(skb); + skb_clear_rxhash(skb); + + while (skb) { + struct sk_buff *next_skb = skb->next; + struct iphdr *iph; + int frag_len; + + skb->next = NULL; + + if (unlikely(vlan_deaccel_tag(skb))) + goto next; + + frag_len = skb->len; + skb_push(skb, tunnel_hlen); + skb_reset_network_header(skb); + skb_set_transport_header(skb, sizeof(struct iphdr)); + + if (next_skb) + skb_dst_set(skb, dst_clone(&rt_dst(rt))); + else + skb_dst_set(skb, &rt_dst(rt)); + + /* Push Tunnel header. */ + build_header(vport, skb, tunnel_hlen); + + /* Push IP header. */ + iph = ip_hdr(skb); + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->protocol = ipproto; + iph->daddr = OVS_CB(skb)->tun_key->ipv4_dst; + iph->saddr = saddr; + iph->tos = OVS_CB(skb)->tun_key->ipv4_tos; + iph->ttl = OVS_CB(skb)->tun_key->ipv4_ttl; + iph->frag_off = OVS_CB(skb)->tun_key->tun_flags & + TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + /* + * Allow our local IP stack to fragment the outer packet even + * if the DF bit is set as a last resort. We also need to + * force selection of an IP ID here with __ip_select_ident(), + * as ip_select_ident() assumes a proper ID is not needed when + * when the DF bit is set. + */ + skb->local_df = 1; + __ip_select_ident(iph, skb_dst(skb), 0); + + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + + err = ip_local_out(skb); + if (unlikely(net_xmit_eval(err))) + goto next; + + sent_len += frag_len; + +next: + skb = next_skb; + } + + return sent_len; + +err_free_rt: + ip_rt_put(rt); +error: + return err; +} + static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb) { int tnl_len; diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c index 8fdec1678..f3ef94707 100644 --- a/datapath/vport-vxlan.c +++ b/datapath/vport-vxlan.c @@ -42,7 +42,6 @@ #include #include "datapath.h" -#include "tunnel.h" #include "vport.h" #define OVS_VXLAN_RCV_PRIORITY 8