From a109c9fbf55b83e1ac6a3f6805cc1945fc41e91c Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 26 Aug 2013 11:05:35 -0700 Subject: [PATCH] datapath: Sync vxlan tunneling code with upstream ovs-vxlan. Upstream vxlan implementation was changed according to few comments. Following patch brings back those changes to out of tree ovs module. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross --- datapath/linux/compat/include/net/vxlan.h | 33 ++-- datapath/linux/compat/vxlan.c | 177 +++++++--------------- datapath/vport-vxlan.c | 36 ++--- 3 files changed, 82 insertions(+), 164 deletions(-) diff --git a/datapath/linux/compat/include/net/vxlan.h b/datapath/linux/compat/include/net/vxlan.h index 102bc0c60..46cbfb603 100644 --- a/datapath/linux/compat/include/net/vxlan.h +++ b/datapath/linux/compat/include/net/vxlan.h @@ -5,35 +5,26 @@ #include #include +struct vxlan_sock; +typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb, __be32 key); + /* per UDP socket information */ struct vxlan_sock { struct hlist_node hlist; - struct rcu_head rcu; - struct socket *sock; - struct list_head handler_list; -}; - -struct vxlan_handler; -typedef int (vxlan_rcv_t)(struct vxlan_handler *vh, struct sk_buff *skb, __be32 key); - -struct vxlan_handler { - vxlan_rcv_t *rcv; - struct list_head node; - void *data; - struct vxlan_sock *vs; - atomic_t refcnt; - struct rcu_head rcu; + vxlan_rcv_t *rcv; + void *data; struct work_struct del_work; - int priority; + struct socket *sock; + struct rcu_head rcu; }; -void vxlan_handler_put(struct vxlan_handler *vh); +struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, + vxlan_rcv_t *rcv, void *data, + bool no_share); -struct vxlan_handler *vxlan_handler_add(struct net *net, - __be16 portno, vxlan_rcv_t *rcv, - void *data, int priority, bool create); +void vxlan_sock_release(struct vxlan_sock *vs); -int vxlan_xmit_skb(struct net *net, struct vxlan_handler *vh, +int vxlan_xmit_skb(struct net *net, struct vxlan_sock *vs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, __be32 vni); diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c index f3df4e3f4..9d8991d6a 100644 --- a/datapath/linux/compat/vxlan.c +++ b/datapath/linux/compat/vxlan.c @@ -59,8 +59,6 @@ #define PORT_HASH_BITS 8 #define PORT_HASH_SIZE (1<handler_list, node) { - if (vh->rcv(vh, skb, vxh->vx_vni) == PACKET_RCVD) - return 0; - } + vs->rcv(vs, skb, vxh->vx_vni); + return 0; drop: /* Consume bad packet */ @@ -211,7 +210,7 @@ static int handle_offloads(struct sk_buff *skb) return 0; } -int vxlan_xmit_skb(struct net *net, struct vxlan_handler *vh, +int vxlan_xmit_skb(struct net *net, struct vxlan_sock *vs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, __be32 vni) @@ -249,7 +248,7 @@ int vxlan_xmit_skb(struct net *net, struct vxlan_handler *vh, uh->len = htons(skb->len); uh->check = 0; - vxlan_set_owner(vh->vs->sock->sk, skb); + vxlan_set_owner(vs->sock->sk, skb); err = handle_offloads(skb); if (err) @@ -259,8 +258,26 @@ int vxlan_xmit_skb(struct net *net, struct vxlan_handler *vh, IPPROTO_UDP, tos, ttl, df); } -static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port) +static void rcu_free_vs(struct rcu_head *rcu) +{ + struct vxlan_sock *vs = container_of(rcu, struct vxlan_sock, rcu); + + kfree(vs); +} + +static void vxlan_del_work(struct work_struct *work) +{ + struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work); + + sk_release_kernel(vs->sock->sk); + call_rcu(&vs->rcu, rcu_free_vs); + vxlan_cleanup_module(); +} + +static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, + vxlan_rcv_t *rcv, void *data) { + struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_sock *vs; struct sock *sk; struct sockaddr_in vxlan_addr = { @@ -271,8 +288,12 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port) int rc; vs = kmalloc(sizeof(*vs), GFP_KERNEL); - if (!vs) + if (!vs) { + pr_debug("memory alocation failure\n"); return ERR_PTR(-ENOMEM); + } + + INIT_WORK(&vs->del_work, vxlan_del_work); /* Create UDP socket for encapsulation receive. */ rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &vs->sock); @@ -287,81 +308,36 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port) sk_change_net(sk, net); rc = kernel_bind(vs->sock, (struct sockaddr *) &vxlan_addr, - sizeof(vxlan_addr)); + sizeof(vxlan_addr)); if (rc < 0) { pr_debug("bind for UDP socket %pI4:%u (%d)\n", - &vxlan_addr.sin_addr, ntohs(vxlan_addr.sin_port), rc); + &vxlan_addr.sin_addr, ntohs(vxlan_addr.sin_port), rc); sk_release_kernel(sk); kfree(vs); return ERR_PTR(rc); } + vs->rcv = rcv; + vs->data = data; /* Disable multicast loopback */ inet_sk(sk)->mc_loop = 0; - INIT_LIST_HEAD(&vs->handler_list); + spin_lock(&vn->sock_lock); hlist_add_head_rcu(&vs->hlist, vs_head(net, port)); + spin_unlock(&vn->sock_lock); /* Mark socket as an encapsulation socket. */ udp_sk(sk)->encap_type = 1; udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv; udp_encap_enable(); - return vs; } -static void rcu_free_vs_callback(struct rcu_head *rcu) -{ - struct vxlan_sock *vs = container_of(rcu, struct vxlan_sock, rcu); - - kfree(vs); -} - -static void vxlan_socket_del(struct vxlan_sock *vs) -{ - if (list_empty(&vs->handler_list)) { - hlist_del_rcu(&vs->hlist); - - sk_release_kernel(vs->sock->sk); - call_rcu(&vs->rcu, rcu_free_vs_callback); - } -} - -static int vxlan_init_module(void); -static void vxlan_cleanup_module(void); - -static void rcu_free_vh_callback(struct rcu_head *rcu) -{ - struct vxlan_handler *vh = container_of(rcu, struct vxlan_handler, rcu); - - kfree(vh); -} - -static void vh_del_work(struct work_struct *work) -{ - struct vxlan_handler *vh = container_of(work, struct vxlan_handler, del_work); - struct vxlan_sock *vs = vh->vs; - struct net *net = sock_net(vs->sock->sk); - struct vxlan_net *vn = net_generic(net, vxlan_net_id); - - mutex_lock(&vn->sock_lock); - - list_del_rcu(&vh->node); - call_rcu(&vh->rcu, rcu_free_vh_callback); - vxlan_socket_del(vs); - - mutex_unlock(&vn->sock_lock); - - vxlan_cleanup_module(); -} - -struct vxlan_handler *vxlan_handler_add(struct net *net, - __be16 portno, vxlan_rcv_t *rcv, - void *data, int priority, bool create) +struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, + vxlan_rcv_t *rcv, void *data, + bool no_share) { struct vxlan_net *vn; struct vxlan_sock *vs; - struct vxlan_handler *vh; - struct vxlan_handler *new; int err; err = vxlan_init_module(); @@ -369,64 +345,19 @@ struct vxlan_handler *vxlan_handler_add(struct net *net, return ERR_PTR(err); vn = net_generic(net, vxlan_net_id); - mutex_lock(&vn->sock_lock); - /* Look to see if can reuse socket */ - vs = vxlan_find_port(net, portno); - if (!vs) { - vs = vxlan_socket_create(net, portno); - if (IS_ERR(vs)) { - new = (void *) vs; - goto out; - } - } - - /* Try existing vxlan hanlders for this socket. */ - list_for_each_entry(vh, &vs->handler_list, node) { - if (vh->rcv == rcv) { - if (create) { - vxlan_socket_del(vs); - new = ERR_PTR(-EEXIST); - goto out; - } - atomic_inc(&vh->refcnt); - new = vh; - goto out; - } - } - - new = kzalloc(sizeof(*new), GFP_KERNEL); - if (!new) { - vxlan_socket_del(vs); - new = ERR_PTR(-ENOMEM); - goto out; - } - - new->rcv = rcv; - new->vs = vs; - atomic_set(&new->refcnt, 1); - INIT_WORK(&new->del_work, vh_del_work); - new->data = data; - new->priority = priority; - - list_for_each_entry(vh, &vs->handler_list, node) { - if (vh->priority > priority) { - list_add_tail_rcu(&new->node, &vh->node); - goto out; - } - } - - list_add_tail_rcu(&new->node, &vs->handler_list); -out: - mutex_unlock(&vn->sock_lock); - return new; + vs = vxlan_socket_create(net, port, rcv, data); + return vs; } -void vxlan_handler_put(struct vxlan_handler *vh) +void vxlan_sock_release(struct vxlan_sock *vs) { - BUG_ON(!vh->vs); + struct vxlan_net *vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id); + + spin_lock(&vn->sock_lock); + hlist_del_rcu(&vs->hlist); + spin_unlock(&vn->sock_lock); - if (atomic_dec_and_test(&vh->refcnt)) - queue_work(&vh->del_work); + queue_work(&vs->del_work); } static int vxlan_init_net(struct net *net) @@ -434,7 +365,7 @@ static int vxlan_init_net(struct net *net) struct vxlan_net *vn = net_generic(net, vxlan_net_id); unsigned int h; - mutex_init(&vn->sock_lock); + spin_lock_init(&vn->sock_lock); for (h = 0; h < PORT_HASH_SIZE; ++h) INIT_HLIST_HEAD(&vn->sock_list[h]); diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c index f3ef94707..c614eff39 100644 --- a/datapath/vport-vxlan.c +++ b/datapath/vport-vxlan.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2011 Nicira, Inc. - * Copyright (c) 2012 Cisco Systems, Inc. + * Copyright (c) 2013 Nicira, Inc. + * Copyright (c) 2013 Cisco Systems, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -44,15 +44,13 @@ #include "datapath.h" #include "vport.h" -#define OVS_VXLAN_RCV_PRIORITY 8 - /** * struct vxlan_port - Keeps track of open UDP ports - * @vh: vxlan_handler created for the port. + * @vs: vxlan_sock created for the port. * @name: vport name. */ struct vxlan_port { - struct vxlan_handler *vh; + struct vxlan_sock *vs; char name[IFNAMSIZ]; }; @@ -62,11 +60,11 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport) } /* Called with rcu_read_lock and BH disabled. */ -static int vxlan_rcv(struct vxlan_handler *vh, struct sk_buff *skb, __be32 vx_vni) +static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) { - struct vport *vport = vh->data; - struct iphdr *iph; struct ovs_key_ipv4_tunnel tun_key; + struct vport *vport = vs->data; + struct iphdr *iph; __be64 key; /* Save outer tunnel values */ @@ -75,13 +73,12 @@ static int vxlan_rcv(struct vxlan_handler *vh, struct sk_buff *skb, __be32 vx_vn ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY); ovs_vport_receive(vport, skb, &tun_key); - return PACKET_RCVD; } static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) { struct vxlan_port *vxlan_port = vxlan_vport(vport); - __be16 dst_port = inet_sport(vxlan_port->vh->vs->sock->sk); + __be16 dst_port = inet_sport(vxlan_port->vs->sock->sk); if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port))) return -EMSGSIZE; @@ -92,7 +89,7 @@ static void vxlan_tnl_destroy(struct vport *vport) { struct vxlan_port *vxlan_port = vxlan_vport(vport); - vxlan_handler_put(vxlan_port->vh); + vxlan_sock_release(vxlan_port->vs); ovs_vport_deferred_free(vport); } @@ -102,7 +99,7 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) struct net *net = ovs_dp_get_net(parms->dp); struct nlattr *options = parms->options; struct vxlan_port *vxlan_port; - struct vxlan_handler *vh; + struct vxlan_sock *vs; struct vport *vport; struct nlattr *a; u16 dst_port; @@ -129,13 +126,12 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) vxlan_port = vxlan_vport(vport); strncpy(vxlan_port->name, parms->name, IFNAMSIZ); - vh = vxlan_handler_add(net, htons(dst_port), vxlan_rcv, - vport, OVS_VXLAN_RCV_PRIORITY, true); - if (IS_ERR(vh)) { + vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true); + if (IS_ERR(vs)) { ovs_vport_free(vport); - return (void *)vh; + return (void *)vs; } - vxlan_port->vh = vh; + vxlan_port->vs = vs; return vport; @@ -146,7 +142,7 @@ error: static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) { struct vxlan_port *vxlan_port = vxlan_vport(vport); - __be16 dst_port = inet_sport(vxlan_port->vh->vs->sock->sk); + __be16 dst_port = inet_sport(vxlan_port->vs->sock->sk); struct net *net = ovs_dp_get_net(vport->dp); struct rtable *rt; __be16 src_port; @@ -184,7 +180,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) inet_get_local_port_range(&port_min, &port_max); src_port = vxlan_src_port(port_min, port_max, skb); - err = vxlan_xmit_skb(net, vxlan_port->vh, rt, skb, + err = vxlan_xmit_skb(net, vxlan_port->vs, rt, skb, saddr, OVS_CB(skb)->tun_key->ipv4_dst, OVS_CB(skb)->tun_key->ipv4_tos, OVS_CB(skb)->tun_key->ipv4_ttl, df, -- 2.43.0