2 * Copyright (c) 2011 Nicira, Inc.
3 * Copyright (c) 2012 Cisco Systems, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 #include <linux/version.h>
23 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
27 #include <linux/list.h>
28 #include <linux/net.h>
29 #include <linux/udp.h>
39 #define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */
42 * struct vxlanhdr - VXLAN header
43 * @vx_flags: Must have the exact value %VXLAN_FLAGS.
44 * @vx_vni: VXLAN Network Identifier (VNI) in top 24 bits, low 8 bits zeroed.
51 #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
53 static inline int vxlan_hdr_len(const struct tnl_mutable_config *mutable,
54 const struct ovs_key_ipv4_tunnel *tun_key)
60 * struct vxlan_port - Keeps track of open UDP ports
61 * @list: list element.
62 * @port: The UDP port number in network byte order.
63 * @socket: The socket created for this port number.
64 * @count: How many ports are using this socket/port.
67 struct list_head list;
69 struct socket *vxlan_rcv_socket;
73 static LIST_HEAD(vxlan_ports);
75 static struct vxlan_port *vxlan_port_exists(struct net *net, __be16 port)
77 struct vxlan_port *vxlan_port;
79 list_for_each_entry(vxlan_port, &vxlan_ports, list) {
80 if (vxlan_port->port == port &&
81 net_eq(sock_net(vxlan_port->vxlan_rcv_socket->sk), net))
88 static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb)
90 return (struct vxlanhdr *)(udp_hdr(skb) + 1);
93 static struct sk_buff *vxlan_build_header(const struct vport *vport,
94 const struct tnl_mutable_config *mutable,
95 struct dst_entry *dst,
99 struct udphdr *udph = udp_hdr(skb);
100 struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1);
101 const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
105 tnl_get_param(mutable, tun_key, &flags, &out_key);
107 udph->dest = mutable->dst_port;
108 udph->source = htons(ovs_tnl_get_src_port(skb));
110 udph->len = htons(skb->len - skb_transport_offset(skb));
112 vxh->vx_flags = htonl(VXLAN_FLAGS);
113 vxh->vx_vni = htonl(be64_to_cpu(out_key) << 8);
116 * Allow our local IP stack to fragment the outer packet even if the
117 * DF bit is set as a last resort. We also need to force selection of
118 * an IP ID here because Linux will otherwise leave it at 0 if the
119 * packet originally had DF set.
122 __ip_select_ident(ip_hdr(skb), dst, 0);
127 /* Called with rcu_read_lock and BH disabled. */
128 static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
131 struct vxlanhdr *vxh;
132 const struct tnl_mutable_config *mutable;
134 struct ovs_key_ipv4_tunnel tun_key;
136 u32 tunnel_flags = 0;
138 if (unlikely(!pskb_may_pull(skb, VXLAN_HLEN + ETH_HLEN)))
141 vxh = vxlan_hdr(skb);
142 if (unlikely(vxh->vx_flags != htonl(VXLAN_FLAGS) ||
143 vxh->vx_vni & htonl(0xff)))
146 __skb_pull(skb, VXLAN_HLEN);
147 skb_postpull_rcsum(skb, skb_transport_header(skb), VXLAN_HLEN + ETH_HLEN);
149 key = cpu_to_be64(ntohl(vxh->vx_vni) >> 8);
152 vport = ovs_tnl_find_port(dev_net(skb->dev), iph->daddr, iph->saddr,
153 key, TNL_T_PROTO_VXLAN, &mutable);
154 if (unlikely(!vport))
157 if (mutable->flags & TNL_F_IN_KEY_MATCH || !mutable->key.daddr)
158 tunnel_flags = OVS_TNL_F_KEY;
162 /* Save outer tunnel values */
163 tnl_tun_key_init(&tun_key, iph, key, tunnel_flags);
164 OVS_CB(skb)->tun_key = &tun_key;
166 ovs_tnl_rcv(vport, skb);
175 /* Random value. Irrelevant as long as it's not 0 since we set the handler. */
176 #define UDP_ENCAP_VXLAN 1
177 static int vxlan_socket_init(struct vxlan_port *vxlan_port, struct net *net)
180 struct sockaddr_in sin;
182 err = sock_create_kern(AF_INET, SOCK_DGRAM, 0,
183 &vxlan_port->vxlan_rcv_socket);
187 /* release net ref. */
188 sk_change_net(vxlan_port->vxlan_rcv_socket->sk, net);
190 sin.sin_family = AF_INET;
191 sin.sin_addr.s_addr = htonl(INADDR_ANY);
192 sin.sin_port = vxlan_port->port;
194 err = kernel_bind(vxlan_port->vxlan_rcv_socket, (struct sockaddr *)&sin,
195 sizeof(struct sockaddr_in));
199 udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_type = UDP_ENCAP_VXLAN;
200 udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_rcv = vxlan_rcv;
207 sk_release_kernel(vxlan_port->vxlan_rcv_socket->sk);
209 pr_warn("cannot register vxlan protocol handler\n");
213 static void vxlan_tunnel_release(struct vxlan_port *vxlan_port)
217 if (vxlan_port->count == 0) {
218 /* Release old socket */
219 sk_release_kernel(vxlan_port->vxlan_rcv_socket->sk);
220 list_del(&vxlan_port->list);
224 static int vxlan_tunnel_setup(struct net *net, struct nlattr *options,
225 struct vxlan_port **vxport)
230 struct vxlan_port *vxlan_port = NULL;
239 a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
240 if (a && nla_len(a) == sizeof(u16)) {
241 dst_port = nla_get_u16(a);
243 /* Require destination port from userspace. */
248 /* Verify if we already have a socket created for this port */
249 vxlan_port = vxlan_port_exists(net, htons(dst_port));
253 *vxport = vxlan_port;
257 /* Add a new socket for this port */
258 vxlan_port = kzalloc(sizeof(struct vxlan_port), GFP_KERNEL);
264 vxlan_port->port = htons(dst_port);
265 vxlan_port->count = 1;
266 list_add_tail(&vxlan_port->list, &vxlan_ports);
268 err = vxlan_socket_init(vxlan_port, net);
272 *vxport = vxlan_port;
276 list_del(&vxlan_port->list);
282 static int vxlan_set_options(struct vport *vport, struct nlattr *options)
285 struct net *net = ovs_dp_get_net(vport->dp);
286 struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
287 struct tnl_mutable_config *config;
288 struct vxlan_port *old_port = NULL;
289 struct vxlan_port *vxlan_port = NULL;
291 config = rtnl_dereference(tnl_vport->mutable);
293 old_port = vxlan_port_exists(net, config->dst_port);
295 err = vxlan_tunnel_setup(net, options, &vxlan_port);
299 err = ovs_tnl_set_options(vport, options);
302 vxlan_tunnel_release(vxlan_port);
304 /* Release old socket */
305 vxlan_tunnel_release(old_port);
311 static const struct tnl_ops ovs_vxlan_tnl_ops = {
312 .tunnel_type = TNL_T_PROTO_VXLAN,
313 .ipproto = IPPROTO_UDP,
314 .hdr_len = vxlan_hdr_len,
315 .build_header = vxlan_build_header,
318 static void vxlan_tnl_destroy(struct vport *vport)
320 struct vxlan_port *vxlan_port;
321 struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
322 struct tnl_mutable_config *config;
324 config = rtnl_dereference(tnl_vport->mutable);
326 vxlan_port = vxlan_port_exists(ovs_dp_get_net(vport->dp),
329 vxlan_tunnel_release(vxlan_port);
331 ovs_tnl_destroy(vport);
334 static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
338 struct vxlan_port *vxlan_port = NULL;
340 err = vxlan_tunnel_setup(ovs_dp_get_net(parms->dp), parms->options,
345 vport = ovs_tnl_create(parms, &ovs_vxlan_vport_ops, &ovs_vxlan_tnl_ops);
348 vxlan_tunnel_release(vxlan_port);
353 const struct vport_ops ovs_vxlan_vport_ops = {
354 .type = OVS_VPORT_TYPE_VXLAN,
355 .flags = VPORT_F_TUN_ID,
356 .create = vxlan_tnl_create,
357 .destroy = vxlan_tnl_destroy,
358 .get_name = ovs_tnl_get_name,
359 .get_options = ovs_tnl_get_options,
360 .set_options = vxlan_set_options,
361 .send = ovs_tnl_send,
364 #warning VXLAN tunneling will not be available on kernels before 2.6.26
365 #endif /* Linux kernel < 2.6.26 */