X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=datapath%2Fvport-lisp.c;h=2f62d1193952e56490f4c5da8d94946688215f6a;hb=77d14d9c7f9ce7245eff56aacd420646577892d0;hp=d45a4cc2d746cd321450df22eb3b10448fccc75f;hpb=c405d282fb7dda49b0aaac12a30121e5f2762b16;p=sliver-openvswitch.git diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c index d45a4cc2d..2f62d1193 100644 --- a/datapath/vport-lisp.c +++ b/datapath/vport-lisp.c @@ -30,13 +30,13 @@ #include #include +#include #include +#include #include "datapath.h" -#include "tunnel.h" #include "vport.h" - /* * LISP encapsulation header: * @@ -160,6 +160,23 @@ static __be64 instance_id_to_tunnel_id(__u8 *iid) #endif } +/* Compute source UDP port for outgoing packet. + * Currently we use the flow hash. + */ +static u16 ovs_tnl_get_src_port(struct sk_buff *skb) +{ + int low; + int high; + unsigned int range; + struct sw_flow_key *pkt_key = OVS_CB(skb)->pkt_key; + u32 hash = jhash2((const u32 *)pkt_key, + sizeof(*pkt_key) / sizeof(u32), 0); + + inet_get_local_port_range(&low, &high); + range = (high - low) + 1; + return (((u64) hash * range) >> 32) + low; +} + static void lisp_build_header(const struct vport *vport, struct sk_buff *skb, int tunnel_hlen) @@ -189,6 +206,48 @@ static void lisp_build_header(const struct vport *vport, lisph->u2.word2.locator_status_bits = 1; } +/** + * ovs_tnl_rcv - ingress point for generic tunnel code + * + * @vport: port this packet was received on + * @skb: received packet + * @tos: ToS from encapsulating IP packet, used to copy ECN bits + * + * Must be called with rcu_read_lock. + * + * Packets received by this function are in the following state: + * - skb->data points to the inner Ethernet header. + * - The inner Ethernet header is in the linear data area. + * - skb->csum does not include the inner Ethernet header. + * - The layer pointers are undefined. + */ +static void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, + struct ovs_key_ipv4_tunnel *tun_key) +{ + struct ethhdr *eh; + + skb_reset_mac_header(skb); + eh = eth_hdr(skb); + + if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) + skb->protocol = eh->h_proto; + else + skb->protocol = htons(ETH_P_802_2); + + skb_dst_drop(skb); + nf_reset(skb); + skb_clear_rxhash(skb); + secpath_reset(skb); + vlan_set_tci(skb, 0); + + if (unlikely(compute_ip_summed(skb, false))) { + kfree_skb(skb); + return; + } + + ovs_vport_receive(vport, skb, tun_key); +} + /* Called with rcu_read_lock and BH disabled. */ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) { @@ -218,8 +277,7 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) /* Save outer tunnel values */ iph = ip_hdr(skb); - tnl_tun_key_init(&tun_key, iph, key, OVS_TNL_F_KEY); - OVS_CB(skb)->tun_key = &tun_key; + ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY); /* Drop non-IP inner packets */ inner_iph = (struct iphdr *)(lisph + 1); @@ -241,7 +299,9 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) ethh->h_source[0] = 0x02; ethh->h_proto = protocol; - ovs_tnl_rcv(vport_from_priv(lisp_port), skb); + ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + + ovs_tnl_rcv(vport_from_priv(lisp_port), skb, &tun_key); goto out; error: @@ -360,15 +420,203 @@ error: return ERR_PTR(err); } +static bool need_linearize(const struct sk_buff *skb) +{ + int i; + + if (unlikely(skb_shinfo(skb)->frag_list)) + return true; + + /* + * Generally speaking we should linearize if there are paged frags. + * However, if all of the refcounts are 1 we know nobody else can + * change them from underneath us and we can skip the linearization. + */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + if (unlikely(page_count(skb_frag_page(&skb_shinfo(skb)->frags[i])) > 1)) + return true; + + return false; +} + +static struct sk_buff *handle_offloads(struct sk_buff *skb) +{ + int err; + + forward_ip_summed(skb, true); + + + if (skb_is_gso(skb)) { + struct sk_buff *nskb; + char cb[sizeof(skb->cb)]; + + memcpy(cb, skb->cb, sizeof(cb)); + + nskb = __skb_gso_segment(skb, 0, false); + if (IS_ERR(nskb)) { + err = PTR_ERR(nskb); + goto error; + } + + consume_skb(skb); + skb = nskb; + while (nskb) { + memcpy(nskb->cb, cb, sizeof(cb)); + nskb = nskb->next; + } + } else if (get_ip_summed(skb) == OVS_CSUM_PARTIAL) { + /* Pages aren't locked and could change at any time. + * If this happens after we compute the checksum, the + * checksum will be wrong. We linearize now to avoid + * this problem. + */ + if (unlikely(need_linearize(skb))) { + err = __skb_linearize(skb); + if (unlikely(err)) + goto error; + } + + err = skb_checksum_help(skb); + if (unlikely(err)) + goto error; + } + + set_ip_summed(skb, OVS_CSUM_NONE); + + return skb; + +error: + return ERR_PTR(err); +} + +static int ovs_tnl_send(struct vport *vport, struct sk_buff *skb, + u8 ipproto, int tunnel_hlen, + void (*build_header)(const struct vport *, + struct sk_buff *, + int tunnel_hlen)) +{ + int min_headroom; + struct rtable *rt; + __be32 saddr; + int sent_len = 0; + int err; + struct sk_buff *nskb; + + /* Route lookup */ + saddr = OVS_CB(skb)->tun_key->ipv4_src; + rt = find_route(ovs_dp_get_net(vport->dp), + &saddr, + OVS_CB(skb)->tun_key->ipv4_dst, + ipproto, + OVS_CB(skb)->tun_key->ipv4_tos, + skb_get_mark(skb)); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto error; + } + + tunnel_hlen += sizeof(struct iphdr); + + min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len + + tunnel_hlen + + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + + if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { + int head_delta = SKB_DATA_ALIGN(min_headroom - + skb_headroom(skb) + + 16); + + err = pskb_expand_head(skb, max_t(int, head_delta, 0), + 0, GFP_ATOMIC); + if (unlikely(err)) + goto err_free_rt; + } + + /* Offloading */ + nskb = handle_offloads(skb); + if (IS_ERR(nskb)) { + err = PTR_ERR(nskb); + goto err_free_rt; + } + skb = nskb; + + /* Reset SKB */ + nf_reset(skb); + secpath_reset(skb); + skb_dst_drop(skb); + skb_clear_rxhash(skb); + + while (skb) { + struct sk_buff *next_skb = skb->next; + struct iphdr *iph; + int frag_len; + + skb->next = NULL; + + if (unlikely(vlan_deaccel_tag(skb))) + goto next; + + frag_len = skb->len; + skb_push(skb, tunnel_hlen); + skb_reset_network_header(skb); + skb_set_transport_header(skb, sizeof(struct iphdr)); + + if (next_skb) + skb_dst_set(skb, dst_clone(&rt_dst(rt))); + else + skb_dst_set(skb, &rt_dst(rt)); + + /* Push Tunnel header. */ + build_header(vport, skb, tunnel_hlen); + + /* Push IP header. */ + iph = ip_hdr(skb); + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->protocol = ipproto; + iph->daddr = OVS_CB(skb)->tun_key->ipv4_dst; + iph->saddr = saddr; + iph->tos = OVS_CB(skb)->tun_key->ipv4_tos; + iph->ttl = OVS_CB(skb)->tun_key->ipv4_ttl; + iph->frag_off = OVS_CB(skb)->tun_key->tun_flags & + TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + /* + * Allow our local IP stack to fragment the outer packet even + * if the DF bit is set as a last resort. We also need to + * force selection of an IP ID here with __ip_select_ident(), + * as ip_select_ident() assumes a proper ID is not needed when + * when the DF bit is set. + */ + skb->local_df = 1; + __ip_select_ident(iph, skb_dst(skb), 0); + + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + + err = ip_local_out(skb); + if (unlikely(net_xmit_eval(err))) + goto next; + + sent_len += frag_len; + +next: + skb = next_skb; + } + + return sent_len; + +err_free_rt: + ip_rt_put(rt); +error: + return err; +} + static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb) { int tnl_len; int network_offset = skb_network_offset(skb); - if (unlikely(!OVS_CB(skb)->tun_key)) { - ovs_vport_record_error(vport, VPORT_E_TX_ERROR); - goto free; - } + if (unlikely(!OVS_CB(skb)->tun_key)) + return -EINVAL; /* We only encapsulate IPv4 and IPv6 packets */ switch (skb->protocol) { @@ -380,13 +628,9 @@ static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb) LISP_HLEN, lisp_build_header); return tnl_len > 0 ? tnl_len + network_offset : tnl_len; default: - ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); - goto free; + kfree_skb(skb); + return 0; } - -free: - kfree_skb(skb); - return 0; } static const char *lisp_get_name(const struct vport *vport) @@ -397,7 +641,6 @@ static const char *lisp_get_name(const struct vport *vport) const struct vport_ops ovs_lisp_vport_ops = { .type = OVS_VPORT_TYPE_LISP, - .flags = VPORT_F_TUN_ID, .create = lisp_tnl_create, .destroy = lisp_tnl_destroy, .get_name = lisp_get_name,