X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=datapath%2Fvport-vxlan.c;h=d140c3b2c1c500cc084ffff3fdd181a6a96c90e5;hb=f40869bdf6feca4d3ff7c59a1fb1f7ac101bc967;hp=a2cbeb968ea756b0bbbc1049b9856d3e4694ccf0;hpb=5ca1ba484bd9ade5116a49cf241cb98219d7d696;p=sliver-openvswitch.git diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c index a2cbeb968..d140c3b2c 100644 --- a/datapath/vport-vxlan.c +++ b/datapath/vport-vxlan.c @@ -24,8 +24,8 @@ #include #include -#include #include +#include #include #include @@ -50,34 +50,34 @@ struct vxlanhdr { #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) -static inline int vxlan_hdr_len(const struct tnl_mutable_config *mutable, - const struct ovs_key_ipv4_tunnel *tun_key) -{ - return VXLAN_HLEN; -} - /** * struct vxlan_port - Keeps track of open UDP ports - * @list: list element. - * @port: The UDP port number in network byte order. - * @socket: The socket created for this port number. - * @count: How many ports are using this socket/port. + * @dst_port: vxlan UDP port no. + * @list: list element in @vxlan_ports. + * @vxlan_rcv_socket: The socket created for this port number. + * @name: vport name. */ struct vxlan_port { + __be16 dst_port; struct list_head list; - __be16 port; struct socket *vxlan_rcv_socket; - int count; + char name[IFNAMSIZ]; }; static LIST_HEAD(vxlan_ports); -static struct vxlan_port *vxlan_port_exists(struct net *net, __be16 port) +static inline struct vxlan_port *vxlan_vport(const struct vport *vport) +{ + return vport_priv(vport); +} + +static struct vxlan_port *vxlan_find_port(struct net *net, __be16 port) { struct vxlan_port *vxlan_port; - list_for_each_entry(vxlan_port, &vxlan_ports, list) { - if (vxlan_port->port == port && + list_for_each_entry_rcu(vxlan_port, &vxlan_ports, list) { + + if (vxlan_port->dst_port == port && net_eq(sock_net(vxlan_port->vxlan_rcv_socket->sk), net)) return vxlan_port; } @@ -90,65 +90,36 @@ static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb) return (struct vxlanhdr *)(udp_hdr(skb) + 1); } -/* Compute source port for outgoing packet. - * Currently we use the flow hash. - */ -static u16 get_src_port(struct sk_buff *skb) -{ - int low; - int high; - unsigned int range; - u32 hash = OVS_CB(skb)->flow->hash; - - inet_get_local_port_range(&low, &high); - range = (high - low) + 1; - return (((u64) hash * range) >> 32) + low; -} - -static struct sk_buff *vxlan_build_header(const struct vport *vport, - const struct tnl_mutable_config *mutable, - struct dst_entry *dst, - struct sk_buff *skb, - int tunnel_hlen) +static void vxlan_build_header(const struct vport *vport, + struct sk_buff *skb, + int tunnel_hlen) { + struct vxlan_port *vxlan_port = vxlan_vport(vport); struct udphdr *udph = udp_hdr(skb); struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1); const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; - __be64 out_key; - u32 flags; - - tnl_get_param(mutable, tun_key, &flags, &out_key); - udph->dest = mutable->dst_port; - udph->source = htons(get_src_port(skb)); + udph->dest = vxlan_port->dst_port; + udph->source = htons(ovs_tnl_get_src_port(skb)); udph->check = 0; udph->len = htons(skb->len - skb_transport_offset(skb)); vxh->vx_flags = htonl(VXLAN_FLAGS); - vxh->vx_vni = htonl(be64_to_cpu(out_key) << 8); - - /* - * Allow our local IP stack to fragment the outer packet even if the - * DF bit is set as a last resort. We also need to force selection of - * an IP ID here because Linux will otherwise leave it at 0 if the - * packet originally had DF set. - */ - skb->local_df = 1; - __ip_select_ident(ip_hdr(skb), dst, 0); - - return skb; + vxh->vx_vni = htonl(be64_to_cpu(tun_key->tun_id) << 8); } /* Called with rcu_read_lock and BH disabled. */ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) { - struct vport *vport; + struct vxlan_port *vxlan_vport; struct vxlanhdr *vxh; - const struct tnl_mutable_config *mutable; struct iphdr *iph; struct ovs_key_ipv4_tunnel tun_key; __be64 key; - u32 tunnel_flags = 0; + + vxlan_vport = vxlan_find_port(dev_net(skb->dev), udp_hdr(skb)->dest); + if (unlikely(!vxlan_vport)) + goto error; if (unlikely(!pskb_may_pull(skb, VXLAN_HLEN + ETH_HLEN))) goto error; @@ -163,24 +134,11 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) key = cpu_to_be64(ntohl(vxh->vx_vni) >> 8); - iph = ip_hdr(skb); - vport = ovs_tnl_find_port(dev_net(skb->dev), iph->daddr, iph->saddr, - key, TNL_T_PROTO_VXLAN, &mutable); - if (unlikely(!vport)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); - goto error; - } - - if (mutable->flags & TNL_F_IN_KEY_MATCH || !mutable->key.daddr) - tunnel_flags = OVS_TNL_F_KEY; - else - key = 0; - /* Save outer tunnel values */ - tnl_tun_key_init(&tun_key, iph, key, tunnel_flags); - OVS_CB(skb)->tun_key = &tun_key; + iph = ip_hdr(skb); + tnl_tun_key_init(&tun_key, iph, key, OVS_TNL_F_KEY); - ovs_tnl_rcv(vport, skb); + ovs_tnl_rcv(vport_from_priv(vxlan_vport), skb, &tun_key); goto out; error: @@ -193,8 +151,8 @@ out: #define UDP_ENCAP_VXLAN 1 static int vxlan_socket_init(struct vxlan_port *vxlan_port, struct net *net) { - int err; struct sockaddr_in sin; + int err; err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &vxlan_port->vxlan_rcv_socket); @@ -206,7 +164,7 @@ static int vxlan_socket_init(struct vxlan_port *vxlan_port, struct net *net) sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl(INADDR_ANY); - sin.sin_port = vxlan_port->port; + sin.sin_port = vxlan_port->dst_port; err = kernel_bind(vxlan_port->vxlan_rcv_socket, (struct sockaddr *)&sin, sizeof(struct sockaddr_in)); @@ -227,156 +185,99 @@ error: return err; } -static void vxlan_tunnel_release(struct vxlan_port *vxlan_port) +static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) { - vxlan_port->count--; + struct vxlan_port *vxlan_port = vxlan_vport(vport); - if (vxlan_port->count == 0) { - /* Release old socket */ - sk_release_kernel(vxlan_port->vxlan_rcv_socket->sk); - list_del(&vxlan_port->list); - kfree(vxlan_port); - } + if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(vxlan_port->dst_port))) + return -EMSGSIZE; + return 0; +} + +static void vxlan_tnl_destroy(struct vport *vport) +{ + struct vxlan_port *vxlan_port = vxlan_vport(vport); + + list_del_rcu(&vxlan_port->list); + /* Release socket */ + sk_release_kernel(vxlan_port->vxlan_rcv_socket->sk); + + ovs_vport_deferred_free(vport); } -static int vxlan_tunnel_setup(struct net *net, struct nlattr *options, - struct vxlan_port **vxport) + +static struct vport *vxlan_tnl_create(const struct vport_parms *parms) { + struct net *net = ovs_dp_get_net(parms->dp); + struct nlattr *options = parms->options; + struct vxlan_port *vxlan_port; + struct vport *vport; struct nlattr *a; int err; u16 dst_port; - struct vxlan_port *vxlan_port = NULL; - - *vxport = NULL; if (!options) { err = -EINVAL; - goto out; + goto error; } - a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT); if (a && nla_len(a) == sizeof(u16)) { dst_port = nla_get_u16(a); } else { /* Require destination port from userspace. */ err = -EINVAL; - goto out; + goto error; } /* Verify if we already have a socket created for this port */ - vxlan_port = vxlan_port_exists(net, htons(dst_port)); - if (vxlan_port) { - vxlan_port->count++; - err = 0; - goto out; + if (vxlan_find_port(net, htons(dst_port))) { + err = -EEXIST; + goto error; } - /* Add a new socket for this port */ - vxlan_port = kzalloc(sizeof(struct vxlan_port), GFP_KERNEL); - if (!vxlan_port) { - err = -ENOMEM; - goto out; - } + vport = ovs_vport_alloc(sizeof(struct vxlan_port), + &ovs_vxlan_vport_ops, parms); + if (IS_ERR(vport)) + return vport; - vxlan_port->port = htons(dst_port); - vxlan_port->count = 1; - list_add_tail(&vxlan_port->list, &vxlan_ports); + vxlan_port = vxlan_vport(vport); + vxlan_port->dst_port = htons(dst_port); + strncpy(vxlan_port->name, parms->name, IFNAMSIZ); err = vxlan_socket_init(vxlan_port, net); if (err) - goto error; + goto error_free; - *vxport = vxlan_port; - goto out; + list_add_tail_rcu(&vxlan_port->list, &vxlan_ports); + return vport; +error_free: + ovs_vport_free(vport); error: - list_del(&vxlan_port->list); - kfree(vxlan_port); -out: - return err; + return ERR_PTR(err); } -static int vxlan_set_options(struct vport *vport, struct nlattr *options) +static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) { - int err; - struct net *net = ovs_dp_get_net(vport->dp); - struct tnl_vport *tnl_vport = tnl_vport_priv(vport); - struct tnl_mutable_config *config; - struct vxlan_port *old_port = NULL; - struct vxlan_port *vxlan_port = NULL; - - config = rtnl_dereference(tnl_vport->mutable); + if (unlikely(!OVS_CB(skb)->tun_key)) + return -EINVAL; - old_port = vxlan_port_exists(net, config->dst_port); - - err = vxlan_tunnel_setup(net, options, &vxlan_port); - if (err) - goto out; - - err = ovs_tnl_set_options(vport, options); - - if (err) - vxlan_tunnel_release(vxlan_port); - else { - /* Release old socket */ - vxlan_tunnel_release(old_port); - } -out: - return err; + return ovs_tnl_send(vport, skb, IPPROTO_UDP, + VXLAN_HLEN, vxlan_build_header); } -static const struct tnl_ops ovs_vxlan_tnl_ops = { - .tunnel_type = TNL_T_PROTO_VXLAN, - .ipproto = IPPROTO_UDP, - .hdr_len = vxlan_hdr_len, - .build_header = vxlan_build_header, -}; - -static void vxlan_tnl_destroy(struct vport *vport) -{ - struct vxlan_port *vxlan_port; - struct tnl_vport *tnl_vport = tnl_vport_priv(vport); - struct tnl_mutable_config *config; - - config = rtnl_dereference(tnl_vport->mutable); - - vxlan_port = vxlan_port_exists(ovs_dp_get_net(vport->dp), - config->dst_port); - - vxlan_tunnel_release(vxlan_port); - - ovs_tnl_destroy(vport); -} - -static struct vport *vxlan_tnl_create(const struct vport_parms *parms) +static const char *vxlan_get_name(const struct vport *vport) { - int err; - struct vport *vport; - struct vxlan_port *vxlan_port = NULL; - - err = vxlan_tunnel_setup(ovs_dp_get_net(parms->dp), parms->options, - &vxlan_port); - if (err) - return ERR_PTR(err); - - vport = ovs_tnl_create(parms, &ovs_vxlan_vport_ops, &ovs_vxlan_tnl_ops); - - if (IS_ERR(vport)) - vxlan_tunnel_release(vxlan_port); - - return vport; + struct vxlan_port *vxlan_port = vxlan_vport(vport); + return vxlan_port->name; } const struct vport_ops ovs_vxlan_vport_ops = { .type = OVS_VPORT_TYPE_VXLAN, - .flags = VPORT_F_TUN_ID, .create = vxlan_tnl_create, .destroy = vxlan_tnl_destroy, - .set_addr = ovs_tnl_set_addr, - .get_name = ovs_tnl_get_name, - .get_addr = ovs_tnl_get_addr, - .get_options = ovs_tnl_get_options, - .set_options = vxlan_set_options, - .send = ovs_tnl_send, + .get_name = vxlan_get_name, + .get_options = vxlan_get_options, + .send = vxlan_tnl_send, }; #else #warning VXLAN tunneling will not be available on kernels before 2.6.26