X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=datapath%2Fvport-lisp.c;h=a1e2b2b69caa678249e3ceb259f83f2bb797719b;hb=e2f3178f0582eda302bdc5629189b6a56d9fbcdd;hp=80e980a228c69fcc332be50ce5e56e8b18c11c61;hpb=a515e4a8addcb961d43f8020d6a376a98da5a8c8;p=sliver-openvswitch.git diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c index 80e980a22..a1e2b2b69 100644 --- a/datapath/vport-lisp.c +++ b/datapath/vport-lisp.c @@ -20,7 +20,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) #include #include @@ -35,6 +34,7 @@ #include #include "datapath.h" +#include "gso.h" #include "vport.h" /* @@ -163,31 +163,36 @@ static __be64 instance_id_to_tunnel_id(__u8 *iid) /* Compute source UDP port for outgoing packet. * Currently we use the flow hash. */ -static u16 ovs_tnl_get_src_port(struct sk_buff *skb) +static u16 get_src_port(struct net *net, struct sk_buff *skb) { - int low; - int high; + u32 hash = skb_get_hash(skb); unsigned int range; - struct sw_flow_key *pkt_key = OVS_CB(skb)->pkt_key; - u32 hash = jhash2((const u32 *)pkt_key, - sizeof(*pkt_key) / sizeof(u32), 0); + int high; + int low; + + if (!hash) { + struct sw_flow_key *pkt_key = OVS_CB(skb)->pkt_key; - inet_get_local_port_range(&low, &high); + hash = jhash2((const u32 *)pkt_key, + sizeof(*pkt_key) / sizeof(u32), 0); + } + + inet_get_local_port_range(net, &low, &high); range = (high - low) + 1; return (((u64) hash * range) >> 32) + low; } static void lisp_build_header(const struct vport *vport, - struct sk_buff *skb, - int tunnel_hlen) + struct sk_buff *skb) { + struct net *net = ovs_dp_get_net(vport->dp); struct lisp_port *lisp_port = lisp_vport(vport); struct udphdr *udph = udp_hdr(skb); struct lisphdr *lisph = (struct lisphdr *)(udph + 1); const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; udph->dest = lisp_port->dst_port; - udph->source = htons(ovs_tnl_get_src_port(skb)); + udph->source = htons(get_src_port(net, skb)); udph->check = 0; udph->len = htons(skb->len - skb_transport_offset(skb)); @@ -206,47 +211,6 @@ static void lisp_build_header(const struct vport *vport, lisph->u2.word2.locator_status_bits = 1; } -/** - * ovs_tnl_rcv - ingress point for generic tunnel code - * - * @vport: port this packet was received on - * @skb: received packet - * @tun_key: tunnel that carried packet - * - * Must be called with rcu_read_lock. - * - * Packets received by this function are in the following state: - * - skb->data points to the inner Ethernet header. - * - The inner Ethernet header is in the linear data area. - * - The layer pointers are undefined. - */ -static void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, - struct ovs_key_ipv4_tunnel *tun_key) -{ - struct ethhdr *eh; - - skb_reset_mac_header(skb); - eh = eth_hdr(skb); - - if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) - skb->protocol = eh->h_proto; - else - skb->protocol = htons(ETH_P_802_2); - - skb_dst_drop(skb); - nf_reset(skb); - skb_clear_rxhash(skb); - secpath_reset(skb); - vlan_set_tci(skb, 0); - - if (unlikely(compute_ip_summed(skb, false))) { - kfree_skb(skb); - return; - } - - ovs_vport_receive(vport, skb, tun_key); -} - /* Called with rcu_read_lock and BH disabled. */ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) { @@ -262,13 +226,11 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) if (unlikely(!lisp_port)) goto error; - if (unlikely(!pskb_may_pull(skb, LISP_HLEN))) + if (iptunnel_pull_header(skb, LISP_HLEN, 0)) goto error; lisph = lisp_hdr(skb); - skb_pull_rcsum(skb, LISP_HLEN); - if (lisph->instance_id_present != 1) key = 0; else @@ -290,6 +252,7 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) default: goto error; } + skb->protocol = protocol; /* Add Ethernet header */ ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN); @@ -300,7 +263,7 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); - ovs_tnl_rcv(vport_from_priv(lisp_port), skb, &tun_key); + ovs_vport_receive(vport_from_priv(lisp_port), skb, &tun_key); goto out; error: @@ -419,106 +382,74 @@ error: return ERR_PTR(err); } -static bool need_linearize(const struct sk_buff *skb) -{ - int i; - - if (unlikely(skb_shinfo(skb)->frag_list)) - return true; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0) - /* - * Generally speaking we should linearize if there are paged frags. - * However, if all of the refcounts are 1 we know nobody else can - * change them from underneath us and we can skip the linearization. - */ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - if (unlikely(page_count(skb_frag_page(&skb_shinfo(skb)->frags[i])) > 1)) - return true; +static void lisp_fix_segment(struct sk_buff *skb) +{ + struct udphdr *udph = udp_hdr(skb); - return false; + udph->len = htons(skb->len - skb_transport_offset(skb)); } -static struct sk_buff *handle_offloads(struct sk_buff *skb) +static int handle_offloads(struct sk_buff *skb) +{ + if (skb_is_gso(skb)) + OVS_GSO_CB(skb)->fix_segment = lisp_fix_segment; + else if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; + return 0; +} +#else +static int handle_offloads(struct sk_buff *skb) { - int err; - - forward_ip_summed(skb, true); - - if (skb_is_gso(skb)) { - struct sk_buff *nskb; - char cb[sizeof(skb->cb)]; - - memcpy(cb, skb->cb, sizeof(cb)); - - nskb = __skb_gso_segment(skb, 0, false); - if (IS_ERR(nskb)) { - err = PTR_ERR(nskb); - goto error; - } - - consume_skb(skb); - skb = nskb; - while (nskb) { - memcpy(nskb->cb, cb, sizeof(cb)); - nskb = nskb->next; - } - } else if (get_ip_summed(skb) == OVS_CSUM_PARTIAL) { - /* Pages aren't locked and could change at any time. - * If this happens after we compute the checksum, the - * checksum will be wrong. We linearize now to avoid - * this problem. - */ - if (unlikely(need_linearize(skb))) { - err = __skb_linearize(skb); - if (unlikely(err)) - goto error; - } - - err = skb_checksum_help(skb); + int err = skb_unclone(skb, GFP_ATOMIC); if (unlikely(err)) - goto error; - } + return err; - set_ip_summed(skb, OVS_CSUM_NONE); + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } else if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; - return skb; - -error: - return ERR_PTR(err); + skb->encapsulation = 1; + return 0; } +#endif -static int ovs_tnl_send(struct vport *vport, struct sk_buff *skb, - u8 ipproto, int tunnel_hlen, - void (*build_header)(const struct vport *, - struct sk_buff *, - int tunnel_hlen)) +static int lisp_send(struct vport *vport, struct sk_buff *skb) { - int min_headroom; + int network_offset = skb_network_offset(skb); struct rtable *rt; + int min_headroom; __be32 saddr; - int sent_len = 0; + __be16 df; + int sent_len; int err; - struct sk_buff *nskb; + + if (unlikely(!OVS_CB(skb)->tun_key)) + return -EINVAL; + + if (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)) { + kfree_skb(skb); + return 0; + } /* Route lookup */ saddr = OVS_CB(skb)->tun_key->ipv4_src; rt = find_route(ovs_dp_get_net(vport->dp), &saddr, OVS_CB(skb)->tun_key->ipv4_dst, - ipproto, + IPPROTO_UDP, OVS_CB(skb)->tun_key->ipv4_tos, - skb_get_mark(skb)); + skb->mark); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto error; } - tunnel_hlen += sizeof(struct iphdr); - min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len - + tunnel_hlen - + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + + sizeof(struct iphdr) + LISP_HLEN; if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { int head_delta = SKB_DATA_ALIGN(min_headroom - @@ -531,77 +462,33 @@ static int ovs_tnl_send(struct vport *vport, struct sk_buff *skb, goto err_free_rt; } + /* Reset l2 headers. */ + skb_pull(skb, network_offset); + skb_reset_mac_header(skb); + vlan_set_tci(skb, 0); + + skb_reset_inner_headers(skb); + + __skb_push(skb, LISP_HLEN); + skb_reset_transport_header(skb); + + lisp_build_header(vport, skb); + /* Offloading */ - nskb = handle_offloads(skb); - if (IS_ERR(nskb)) { - err = PTR_ERR(nskb); + err = handle_offloads(skb); + if (err) goto err_free_rt; - } - skb = nskb; - - /* Reset SKB */ - nf_reset(skb); - secpath_reset(skb); - skb_dst_drop(skb); - skb_clear_rxhash(skb); - - while (skb) { - struct sk_buff *next_skb = skb->next; - struct iphdr *iph; - int frag_len; - - skb->next = NULL; - - if (unlikely(vlan_deaccel_tag(skb))) - goto next; - - frag_len = skb->len; - skb_push(skb, tunnel_hlen); - skb_reset_network_header(skb); - skb_set_transport_header(skb, sizeof(struct iphdr)); - - if (next_skb) - skb_dst_set(skb, dst_clone(&rt_dst(rt))); - else - skb_dst_set(skb, &rt_dst(rt)); - - /* Push Tunnel header. */ - build_header(vport, skb, tunnel_hlen); - - /* Push IP header. */ - iph = ip_hdr(skb); - iph->version = 4; - iph->ihl = sizeof(struct iphdr) >> 2; - iph->protocol = ipproto; - iph->daddr = OVS_CB(skb)->tun_key->ipv4_dst; - iph->saddr = saddr; - iph->tos = OVS_CB(skb)->tun_key->ipv4_tos; - iph->ttl = OVS_CB(skb)->tun_key->ipv4_ttl; - iph->frag_off = OVS_CB(skb)->tun_key->tun_flags & + + skb->local_df = 1; + + df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; - /* - * Allow our local IP stack to fragment the outer packet even - * if the DF bit is set as a last resort. We also need to - * force selection of an IP ID here with __ip_select_ident(), - * as ip_select_ident() assumes a proper ID is not needed when - * when the DF bit is set. - */ - skb->local_df = 1; - __ip_select_ident(iph, skb_dst(skb), 0); - - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - - err = ip_local_out(skb); - if (unlikely(net_xmit_eval(err))) - goto next; - - sent_len += frag_len; - -next: - skb = next_skb; - } + sent_len = iptunnel_xmit(rt, skb, + saddr, OVS_CB(skb)->tun_key->ipv4_dst, + IPPROTO_UDP, OVS_CB(skb)->tun_key->ipv4_tos, + OVS_CB(skb)->tun_key->ipv4_ttl, df, false); - return sent_len; + return sent_len > 0 ? sent_len + network_offset : sent_len; err_free_rt: ip_rt_put(rt); @@ -609,29 +496,6 @@ error: return err; } -static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb) -{ - int tnl_len; - int network_offset = skb_network_offset(skb); - - if (unlikely(!OVS_CB(skb)->tun_key)) - return -EINVAL; - - /* We only encapsulate IPv4 and IPv6 packets */ - switch (skb->protocol) { - case htons(ETH_P_IP): - case htons(ETH_P_IPV6): - /* Pop off "inner" Ethernet header */ - skb_pull(skb, network_offset); - tnl_len = ovs_tnl_send(vport, skb, IPPROTO_UDP, - LISP_HLEN, lisp_build_header); - return tnl_len > 0 ? tnl_len + network_offset : tnl_len; - default: - kfree_skb(skb); - return 0; - } -} - static const char *lisp_get_name(const struct vport *vport) { struct lisp_port *lisp_port = lisp_vport(vport); @@ -644,8 +508,5 @@ const struct vport_ops ovs_lisp_vport_ops = { .destroy = lisp_tnl_destroy, .get_name = lisp_get_name, .get_options = lisp_get_options, - .send = lisp_tnl_send, + .send = lisp_send, }; -#else -#warning LISP tunneling will not be available on kernels before 2.6.26 -#endif /* Linux kernel < 2.6.26 */