X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=datapath%2Fvport-lisp.c;h=a1e2b2b69caa678249e3ceb259f83f2bb797719b;hb=HEAD;hp=0f013953025b3f4983752a618e8e985e10c1a5fc;hpb=a6ae068b7b3ea1e039279e35ffae6ceddfb73866;p=sliver-openvswitch.git diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c index 0f0139530..a1e2b2b69 100644 --- a/datapath/vport-lisp.c +++ b/datapath/vport-lisp.c @@ -20,23 +20,23 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) #include #include -#include #include +#include #include #include #include +#include #include +#include #include "datapath.h" -#include "tunnel.h" +#include "gso.h" #include "vport.h" - /* * LISP encapsulation header: * @@ -94,34 +94,33 @@ struct lisphdr { #define LISP_HLEN (sizeof(struct udphdr) + sizeof(struct lisphdr)) -static inline int lisp_hdr_len(const struct tnl_mutable_config *mutable, - const struct ovs_key_ipv4_tunnel *tun_key) -{ - return LISP_HLEN; -} - /** * struct lisp_port - Keeps track of open UDP ports - * @list: list element. - * @port: The UDP port number in network byte order. - * @socket: The socket created for this port number. - * @count: How many ports are using this socket/port. + * @dst_port: lisp UDP port no. + * @list: list element in @lisp_ports. + * @lisp_rcv_socket: The socket created for this port number. + * @name: vport name. */ struct lisp_port { + __be16 dst_port; struct list_head list; - __be16 port; struct socket *lisp_rcv_socket; - int count; + char name[IFNAMSIZ]; }; static LIST_HEAD(lisp_ports); -static struct lisp_port *lisp_port_exists(struct net *net, __be16 port) +static inline struct lisp_port *lisp_vport(const struct vport *vport) +{ + return vport_priv(vport); +} + +static struct lisp_port *lisp_find_port(struct net *net, __be16 port) { struct lisp_port *lisp_port; - list_for_each_entry(lisp_port, &lisp_ports, list) { - if (lisp_port->port == port && + list_for_each_entry_rcu(lisp_port, &lisp_ports, list) { + if (lisp_port->dst_port == port && net_eq(sock_net(lisp_port->lisp_rcv_socket->sk), net)) return lisp_port; } @@ -134,25 +133,6 @@ static inline struct lisphdr *lisp_hdr(const struct sk_buff *skb) return (struct lisphdr *)(udp_hdr(skb) + 1); } -static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb) -{ - int tnl_len; - int network_offset = skb_network_offset(skb); - - /* We only encapsulate IPv4 and IPv6 packets */ - switch (skb->protocol) { - case htons(ETH_P_IP): - case htons(ETH_P_IPV6): - /* Pop off "inner" Ethernet header */ - skb_pull(skb, network_offset); - tnl_len = ovs_tnl_send(vport, skb); - return tnl_len > 0 ? tnl_len + network_offset : tnl_len; - default: - kfree_skb(skb); - return 0; - } -} - /* Convert 64 bit tunnel ID to 24 bit Instance ID. */ static void tunnel_id_to_instance_id(__be64 tun_id, __u8 *iid) { @@ -180,22 +160,39 @@ static __be64 instance_id_to_tunnel_id(__u8 *iid) #endif } -static struct sk_buff *lisp_build_header(const struct vport *vport, - const struct tnl_mutable_config *mutable, - struct dst_entry *dst, - struct sk_buff *skb, - int tunnel_hlen) +/* Compute source UDP port for outgoing packet. + * Currently we use the flow hash. + */ +static u16 get_src_port(struct net *net, struct sk_buff *skb) +{ + u32 hash = skb_get_hash(skb); + unsigned int range; + int high; + int low; + + if (!hash) { + struct sw_flow_key *pkt_key = OVS_CB(skb)->pkt_key; + + hash = jhash2((const u32 *)pkt_key, + sizeof(*pkt_key) / sizeof(u32), 0); + } + + inet_get_local_port_range(net, &low, &high); + range = (high - low) + 1; + return (((u64) hash * range) >> 32) + low; +} + +static void lisp_build_header(const struct vport *vport, + struct sk_buff *skb) { + struct net *net = ovs_dp_get_net(vport->dp); + struct lisp_port *lisp_port = lisp_vport(vport); struct udphdr *udph = udp_hdr(skb); struct lisphdr *lisph = (struct lisphdr *)(udph + 1); const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; - __be64 out_key; - u32 flags; - - tnl_get_param(mutable, tun_key, &flags, &out_key); - udph->dest = mutable->dst_port; - udph->source = htons(ovs_tnl_get_src_port(skb)); + udph->dest = lisp_port->dst_port; + udph->source = htons(get_src_port(net, skb)); udph->check = 0; udph->len = htons(skb->len - skb_transport_offset(skb)); @@ -210,60 +207,38 @@ static struct sk_buff *lisp_build_header(const struct vport *vport, lisph->u1.nonce[1] = 0; lisph->u1.nonce[2] = 0; - tunnel_id_to_instance_id(out_key, &lisph->u2.word2.instance_id[0]); + tunnel_id_to_instance_id(tun_key->tun_id, &lisph->u2.word2.instance_id[0]); lisph->u2.word2.locator_status_bits = 1; - - /* - * Allow our local IP stack to fragment the outer packet even if the - * DF bit is set as a last resort. We also need to force selection of - * an IP ID here because Linux will otherwise leave it at 0 if the - * packet originally had DF set. - */ - skb->local_df = 1; - __ip_select_ident(ip_hdr(skb), dst, 0); - - return skb; } /* Called with rcu_read_lock and BH disabled. */ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) { - struct vport *vport; + struct lisp_port *lisp_port; struct lisphdr *lisph; - const struct tnl_mutable_config *mutable; struct iphdr *iph, *inner_iph; struct ovs_key_ipv4_tunnel tun_key; __be64 key; - u32 tunnel_flags = 0; struct ethhdr *ethh; __be16 protocol; - if (unlikely(!pskb_may_pull(skb, LISP_HLEN))) + lisp_port = lisp_find_port(dev_net(skb->dev), udp_hdr(skb)->dest); + if (unlikely(!lisp_port)) goto error; - lisph = lisp_hdr(skb); + if (iptunnel_pull_header(skb, LISP_HLEN, 0)) + goto error; - skb_pull_rcsum(skb, LISP_HLEN); + lisph = lisp_hdr(skb); if (lisph->instance_id_present != 1) key = 0; else key = instance_id_to_tunnel_id(&lisph->u2.word2.instance_id[0]); - iph = ip_hdr(skb); - vport = ovs_tnl_find_port(dev_net(skb->dev), iph->daddr, iph->saddr, - key, TNL_T_PROTO_LISP, &mutable); - if (unlikely(!vport)) - goto error; - - if (mutable->flags & TNL_F_IN_KEY_MATCH || !mutable->key.daddr) - tunnel_flags = OVS_TNL_F_KEY; - else - key = 0; - /* Save outer tunnel values */ - tnl_tun_key_init(&tun_key, iph, key, tunnel_flags); - OVS_CB(skb)->tun_key = &tun_key; + iph = ip_hdr(skb); + ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY); /* Drop non-IP inner packets */ inner_iph = (struct iphdr *)(lisph + 1); @@ -277,6 +252,7 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) default: goto error; } + skb->protocol = protocol; /* Add Ethernet header */ ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN); @@ -285,7 +261,9 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) ethh->h_source[0] = 0x02; ethh->h_proto = protocol; - ovs_tnl_rcv(vport, skb); + ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + + ovs_vport_receive(vport_from_priv(lisp_port), skb, &tun_key); goto out; error: @@ -298,8 +276,8 @@ out: #define UDP_ENCAP_LISP 1 static int lisp_socket_init(struct lisp_port *lisp_port, struct net *net) { - int err; struct sockaddr_in sin; + int err; err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &lisp_port->lisp_rcv_socket); @@ -311,7 +289,7 @@ static int lisp_socket_init(struct lisp_port *lisp_port, struct net *net) sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl(INADDR_ANY); - sin.sin_port = lisp_port->port; + sin.sin_port = lisp_port->dst_port; err = kernel_bind(lisp_port->lisp_rcv_socket, (struct sockaddr *)&sin, sizeof(struct sockaddr_in)); @@ -332,31 +310,39 @@ error: return err; } -static void lisp_tunnel_release(struct lisp_port *lisp_port) +static int lisp_get_options(const struct vport *vport, struct sk_buff *skb) { - lisp_port->count--; + struct lisp_port *lisp_port = lisp_vport(vport); - if (lisp_port->count == 0) { - /* Release old socket */ - sk_release_kernel(lisp_port->lisp_rcv_socket->sk); - list_del(&lisp_port->list); - kfree(lisp_port); - } + if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(lisp_port->dst_port))) + return -EMSGSIZE; + return 0; +} + +static void lisp_tnl_destroy(struct vport *vport) +{ + struct lisp_port *lisp_port = lisp_vport(vport); + + list_del_rcu(&lisp_port->list); + /* Release socket */ + sk_release_kernel(lisp_port->lisp_rcv_socket->sk); + + ovs_vport_deferred_free(vport); } -static int lisp_tunnel_setup(struct net *net, struct nlattr *options, - struct lisp_port **lport) +static struct vport *lisp_tnl_create(const struct vport_parms *parms) { + struct net *net = ovs_dp_get_net(parms->dp); + struct nlattr *options = parms->options; + struct lisp_port *lisp_port; + struct vport *vport; struct nlattr *a; int err; u16 dst_port; - struct lisp_port *lisp_port = NULL; - - *lport = NULL; if (!options) { err = -EINVAL; - goto out; + goto error; } a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT); @@ -365,124 +351,162 @@ static int lisp_tunnel_setup(struct net *net, struct nlattr *options, } else { /* Require destination port from userspace. */ err = -EINVAL; - goto out; + goto error; } /* Verify if we already have a socket created for this port */ - lisp_port = lisp_port_exists(net, htons(dst_port)); - if (lisp_port) { - lisp_port->count++; - err = 0; - *lport = lisp_port; - goto out; + if (lisp_find_port(net, htons(dst_port))) { + err = -EEXIST; + goto error; } - /* Add a new socket for this port */ - lisp_port = kzalloc(sizeof(struct lisp_port), GFP_KERNEL); - if (!lisp_port) { - err = -ENOMEM; - goto out; - } + vport = ovs_vport_alloc(sizeof(struct lisp_port), + &ovs_lisp_vport_ops, parms); + if (IS_ERR(vport)) + return vport; - lisp_port->port = htons(dst_port); - lisp_port->count = 1; - list_add_tail(&lisp_port->list, &lisp_ports); + lisp_port = lisp_vport(vport); + lisp_port->dst_port = htons(dst_port); + strncpy(lisp_port->name, parms->name, IFNAMSIZ); err = lisp_socket_init(lisp_port, net); if (err) - goto error; + goto error_free; - *lport = lisp_port; - goto out; + list_add_tail_rcu(&lisp_port->list, &lisp_ports); + return vport; +error_free: + ovs_vport_free(vport); error: - list_del(&lisp_port->list); - kfree(lisp_port); -out: - return err; + return ERR_PTR(err); } -static int lisp_tnl_set_options(struct vport *vport, struct nlattr *options) +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0) + +static void lisp_fix_segment(struct sk_buff *skb) { - int err; - struct net *net = ovs_dp_get_net(vport->dp); - struct tnl_vport *tnl_vport = tnl_vport_priv(vport); - struct tnl_mutable_config *config; - struct lisp_port *old_port = NULL; - struct lisp_port *lisp_port = NULL; + struct udphdr *udph = udp_hdr(skb); - config = rtnl_dereference(tnl_vport->mutable); + udph->len = htons(skb->len - skb_transport_offset(skb)); +} - old_port = lisp_port_exists(net, config->dst_port); +static int handle_offloads(struct sk_buff *skb) +{ + if (skb_is_gso(skb)) + OVS_GSO_CB(skb)->fix_segment = lisp_fix_segment; + else if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; + return 0; +} +#else +static int handle_offloads(struct sk_buff *skb) +{ + if (skb_is_gso(skb)) { + int err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + return err; - err = lisp_tunnel_setup(net, options, &lisp_port); - if (err) - goto out; + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } else if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; + + skb->encapsulation = 1; + return 0; +} +#endif - err = ovs_tnl_set_options(vport, options); +static int lisp_send(struct vport *vport, struct sk_buff *skb) +{ + int network_offset = skb_network_offset(skb); + struct rtable *rt; + int min_headroom; + __be32 saddr; + __be16 df; + int sent_len; + int err; - if (err) - lisp_tunnel_release(lisp_port); - else { - /* Release old socket */ - lisp_tunnel_release(old_port); + if (unlikely(!OVS_CB(skb)->tun_key)) + return -EINVAL; + + if (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)) { + kfree_skb(skb); + return 0; } -out: - return err; -} -static const struct tnl_ops ovs_lisp_tnl_ops = { - .tunnel_type = TNL_T_PROTO_LISP, - .ipproto = IPPROTO_UDP, - .hdr_len = lisp_hdr_len, - .build_header = lisp_build_header, -}; + /* Route lookup */ + saddr = OVS_CB(skb)->tun_key->ipv4_src; + rt = find_route(ovs_dp_get_net(vport->dp), + &saddr, + OVS_CB(skb)->tun_key->ipv4_dst, + IPPROTO_UDP, + OVS_CB(skb)->tun_key->ipv4_tos, + skb->mark); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto error; + } -static void lisp_tnl_destroy(struct vport *vport) -{ - struct lisp_port *lisp_port; - struct tnl_vport *tnl_vport = tnl_vport_priv(vport); - struct tnl_mutable_config *config; + min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len + + sizeof(struct iphdr) + LISP_HLEN; - config = rtnl_dereference(tnl_vport->mutable); + if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { + int head_delta = SKB_DATA_ALIGN(min_headroom - + skb_headroom(skb) + + 16); - lisp_port = lisp_port_exists(ovs_dp_get_net(vport->dp), - config->dst_port); + err = pskb_expand_head(skb, max_t(int, head_delta, 0), + 0, GFP_ATOMIC); + if (unlikely(err)) + goto err_free_rt; + } - lisp_tunnel_release(lisp_port); + /* Reset l2 headers. */ + skb_pull(skb, network_offset); + skb_reset_mac_header(skb); + vlan_set_tci(skb, 0); - ovs_tnl_destroy(vport); -} + skb_reset_inner_headers(skb); -static struct vport *lisp_tnl_create(const struct vport_parms *parms) -{ - int err; - struct vport *vport; - struct lisp_port *lisp_port = NULL; + __skb_push(skb, LISP_HLEN); + skb_reset_transport_header(skb); + + lisp_build_header(vport, skb); - err = lisp_tunnel_setup(ovs_dp_get_net(parms->dp), parms->options, - &lisp_port); + /* Offloading */ + err = handle_offloads(skb); if (err) - return ERR_PTR(err); + goto err_free_rt; - vport = ovs_tnl_create(parms, &ovs_lisp_vport_ops, &ovs_lisp_tnl_ops); + skb->local_df = 1; - if (IS_ERR(vport)) - lisp_tunnel_release(lisp_port); + df = OVS_CB(skb)->tun_key->tun_flags & + TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + sent_len = iptunnel_xmit(rt, skb, + saddr, OVS_CB(skb)->tun_key->ipv4_dst, + IPPROTO_UDP, OVS_CB(skb)->tun_key->ipv4_tos, + OVS_CB(skb)->tun_key->ipv4_ttl, df, false); - return vport; + return sent_len > 0 ? sent_len + network_offset : sent_len; + +err_free_rt: + ip_rt_put(rt); +error: + return err; +} + +static const char *lisp_get_name(const struct vport *vport) +{ + struct lisp_port *lisp_port = lisp_vport(vport); + return lisp_port->name; } const struct vport_ops ovs_lisp_vport_ops = { .type = OVS_VPORT_TYPE_LISP, - .flags = VPORT_F_TUN_ID, .create = lisp_tnl_create, .destroy = lisp_tnl_destroy, - .get_name = ovs_tnl_get_name, - .get_options = ovs_tnl_get_options, - .set_options = lisp_tnl_set_options, - .send = lisp_tnl_send, + .get_name = lisp_get_name, + .get_options = lisp_get_options, + .send = lisp_send, }; -#else -#warning LISP tunneling will not be available on kernels before 2.6.26 -#endif /* Linux kernel < 2.6.26 */