/*
- * Copyright (c) 2010 Nicira Networks.
- * Distributed under the terms of the GNU GPL version 2.
+ * Copyright (c) 2007-2012 Nicira, Inc.
*
- * Significant portions of this file may be copied from parts of the Linux
- * kernel, by Linus Torvalds and others.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
*/
-#include <linux/if_arp.h>
-#include <linux/if_ether.h>
+#include <linux/kconfig.h>
+#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX)
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/if.h>
+#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/if_tunnel.h>
#include <linux/if_vlan.h>
#include <linux/in.h>
#include <linux/in_route.h>
+#include <linux/inetdevice.h>
#include <linux/jhash.h>
+#include <linux/list.h>
#include <linux/kernel.h>
-#include <linux/version.h>
+#include <linux/workqueue.h>
+#include <linux/rculist.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/route.h>
+#include <net/xfrm.h>
-#include <net/dsfield.h>
-#include <net/dst.h>
#include <net/icmp.h>
-#include <net/inet_ecn.h>
#include <net/ip.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-#include <net/ipv6.h>
-#endif
+#include <net/ip_tunnels.h>
+#include <net/gre.h>
#include <net/protocol.h>
-#include <net/route.h>
-#include <net/xfrm.h>
-#include "actions.h"
#include "datapath.h"
-#include "openvswitch/gre.h"
-#include "table.h"
#include "vport.h"
-#include "vport-generic.h"
-
-/* The absolute minimum fragment size. Note that there are many other
- * definitions of the minimum MTU. */
-#define IP_MIN_MTU 68
-
-/* The GRE header is composed of a series of sections: a base and then a variable
- * number of options. */
-#define GRE_HEADER_SECTION 4
-
-struct mutable_config {
- struct rcu_head rcu;
-
- unsigned char eth_addr[ETH_ALEN];
- unsigned int mtu;
- struct gre_port_config port_config;
-
- int tunnel_hlen; /* Tunnel header length. */
-};
-
-struct gre_vport {
- struct rcu_head rcu;
- struct tbl_node tbl_node;
-
- char name[IFNAMSIZ];
-
- /* Protected by RCU. */
- struct mutable_config *mutable;
-};
-
-/* Protected by RCU. */
-static struct tbl *port_table;
-
-/* These are just used as an optimization: they don't require any kind of
- * synchronization because we could have just as easily read the value before
- * the port change happened. */
-static unsigned int key_local_remote_ports;
-static unsigned int key_remote_ports;
-static unsigned int local_remote_ports;
-static unsigned int remote_ports;
-
-static inline struct gre_vport *gre_vport_priv(const struct vport *vport)
-{
- return vport_priv(vport);
-}
-
-static inline struct vport *gre_vport_to_vport(const struct gre_vport *gre_vport)
-{
- return vport_from_priv(gre_vport);
-}
-
-static inline struct gre_vport *gre_vport_table_cast(const struct tbl_node *node)
-{
- return container_of(node, struct gre_vport, tbl_node);
-}
-
-/* RCU callback. */
-static void free_config(struct rcu_head *rcu)
-{
- struct mutable_config *c = container_of(rcu, struct mutable_config, rcu);
- kfree(c);
-}
-
-static void assign_config_rcu(struct vport *vport,
- struct mutable_config *new_config)
-{
- struct gre_vport *gre_vport = gre_vport_priv(vport);
- struct mutable_config *old_config;
-
- old_config = rcu_dereference(gre_vport->mutable);
- rcu_assign_pointer(gre_vport->mutable, new_config);
- call_rcu(&old_config->rcu, free_config);
-}
-
-static unsigned int *find_port_pool(const struct mutable_config *mutable)
-{
- if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH) {
- if (mutable->port_config.saddr)
- return &local_remote_ports;
- else
- return &remote_ports;
- } else {
- if (mutable->port_config.saddr)
- return &key_local_remote_ports;
- else
- return &key_remote_ports;
- }
-}
-
-enum lookup_key {
- LOOKUP_SADDR = 0,
- LOOKUP_DADDR = 1,
- LOOKUP_KEY = 2,
- LOOKUP_KEY_MATCH = 3
-};
-
-struct port_lookup_key {
- u32 vals[4]; /* Contains enum lookup_key keys. */
- const struct mutable_config *mutable;
-};
-
-/* Modifies 'target' to store the rcu_dereferenced pointer that was used to do
- * the comparision. */
-static int port_cmp(const struct tbl_node *node, void *target)
-{
- const struct gre_vport *gre_vport = gre_vport_table_cast(node);
- struct port_lookup_key *lookup = target;
- lookup->mutable = rcu_dereference(gre_vport->mutable);
-
- return ((lookup->mutable->port_config.flags & GRE_F_IN_KEY_MATCH) ==
- lookup->vals[LOOKUP_KEY_MATCH]) &&
- lookup->mutable->port_config.daddr == lookup->vals[LOOKUP_DADDR] &&
- lookup->mutable->port_config.in_key == lookup->vals[LOOKUP_KEY] &&
- lookup->mutable->port_config.saddr == lookup->vals[LOOKUP_SADDR];
-}
-
-static u32 port_hash(struct port_lookup_key *lookup)
+/* Returns the least-significant 32 bits of a __be64. */
+static __be32 be64_get_low32(__be64 x)
{
- return jhash2(lookup->vals, ARRAY_SIZE(lookup->vals), 0);
+#ifdef __BIG_ENDIAN
+ return (__force __be32)x;
+#else
+ return (__force __be32)((__force u64)x >> 32);
+#endif
}
-static int add_port(struct vport *vport)
+static __be16 filter_tnl_flags(__be16 flags)
{
- struct gre_vport *gre_vport = gre_vport_priv(vport);
- struct port_lookup_key lookup;
- int err;
-
- if (!port_table) {
- struct tbl *new_table;
-
- new_table = tbl_create(0);
- if (!new_table)
- return -ENOMEM;
-
- rcu_assign_pointer(port_table, new_table);
-
- } else if (tbl_count(port_table) > tbl_n_buckets(port_table)) {
- struct tbl *old_table = port_table;
- struct tbl *new_table;
-
- new_table = tbl_expand(old_table);
- if (IS_ERR(new_table))
- return PTR_ERR(new_table);
-
- rcu_assign_pointer(port_table, new_table);
- tbl_deferred_destroy(old_table, NULL);
- }
-
- lookup.vals[LOOKUP_SADDR] = gre_vport->mutable->port_config.saddr;
- lookup.vals[LOOKUP_DADDR] = gre_vport->mutable->port_config.daddr;
- lookup.vals[LOOKUP_KEY] = gre_vport->mutable->port_config.in_key;
- lookup.vals[LOOKUP_KEY_MATCH] = gre_vport->mutable->port_config.flags & GRE_F_IN_KEY_MATCH;
-
- err = tbl_insert(port_table, &gre_vport->tbl_node, port_hash(&lookup));
- if (err)
- return err;
-
- (*find_port_pool(gre_vport->mutable))++;
-
- return 0;
+ return flags & (TUNNEL_CSUM | TUNNEL_KEY);
}
-static int del_port(struct vport *vport)
+static struct sk_buff *__build_header(struct sk_buff *skb,
+ int tunnel_hlen,
+ __be32 seq, __be16 gre64_flag)
{
- struct gre_vport *gre_vport = gre_vport_priv(vport);
- int err;
-
- err = tbl_remove(port_table, &gre_vport->tbl_node);
- if (err)
- return err;
-
- (*find_port_pool(gre_vport->mutable))--;
-
- return 0;
-}
-
-#define FIND_PORT_KEY (1 << 0)
-#define FIND_PORT_MATCH (1 << 1)
-#define FIND_PORT_ANY (FIND_PORT_KEY | FIND_PORT_MATCH)
+ const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
+ struct tnl_ptk_info tpi;
-static struct vport *find_port(__be32 saddr, __be32 daddr, __be32 key,
- int port_type,
- const struct mutable_config **mutable)
-{
- struct port_lookup_key lookup;
- struct tbl *table = rcu_dereference(port_table);
- struct tbl_node *tbl_node;
-
- if (!table)
+ skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
+ if (IS_ERR(skb))
return NULL;
- lookup.vals[LOOKUP_SADDR] = saddr;
- lookup.vals[LOOKUP_DADDR] = daddr;
-
- if (port_type & FIND_PORT_KEY) {
- lookup.vals[LOOKUP_KEY] = key;
- lookup.vals[LOOKUP_KEY_MATCH] = 0;
-
- if (key_local_remote_ports) {
- tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
- if (tbl_node)
- goto found;
- }
-
- if (key_remote_ports) {
- lookup.vals[LOOKUP_SADDR] = 0;
+ tpi.flags = filter_tnl_flags(tun_key->tun_flags) | gre64_flag;
- tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
- if (tbl_node)
- goto found;
-
- lookup.vals[LOOKUP_SADDR] = saddr;
- }
- }
-
- if (port_type & FIND_PORT_MATCH) {
- lookup.vals[LOOKUP_KEY] = 0;
- lookup.vals[LOOKUP_KEY_MATCH] = GRE_F_IN_KEY_MATCH;
-
- if (local_remote_ports) {
- tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
- if (tbl_node)
- goto found;
- }
+ tpi.proto = htons(ETH_P_TEB);
+ tpi.key = be64_get_low32(tun_key->tun_id);
+ tpi.seq = seq;
+ gre_build_header(skb, &tpi, tunnel_hlen);
- if (remote_ports) {
- lookup.vals[LOOKUP_SADDR] = 0;
-
- tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
- if (tbl_node)
- goto found;
- }
- }
-
- return NULL;
-
-found:
- *mutable = lookup.mutable;
- return gre_vport_to_vport(gre_vport_table_cast(tbl_node));
+ return skb;
}
-static bool check_ipv4_address(__be32 addr)
+static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
{
- if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr)
- || ipv4_is_loopback(addr) || ipv4_is_zeronet(addr))
- return false;
-
- return true;
+#ifdef __BIG_ENDIAN
+ return (__force __be64)((__force u64)seq << 32 | (__force u32)key);
+#else
+ return (__force __be64)((__force u64)key << 32 | (__force u32)seq);
+#endif
}
-static bool ipv4_should_icmp(struct sk_buff *skb)
+/* Called with rcu_read_lock and BH disabled. */
+static int gre_rcv(struct sk_buff *skb,
+ const struct tnl_ptk_info *tpi)
{
- struct iphdr *old_iph = ip_hdr(skb);
-
- /* Don't respond to L2 broadcast. */
- if (is_multicast_ether_addr(eth_hdr(skb)->h_dest))
- return false;
-
- /* Don't respond to L3 broadcast or invalid addresses. */
- if (!check_ipv4_address(old_iph->daddr) ||
- !check_ipv4_address(old_iph->saddr))
- return false;
-
- /* Only respond to the first fragment. */
- if (old_iph->frag_off & htons(IP_OFFSET))
- return false;
-
- /* Don't respond to ICMP error messages. */
- if (old_iph->protocol == IPPROTO_ICMP) {
- u8 icmp_type, *icmp_typep;
-
- icmp_typep = skb_header_pointer(skb, (u8 *)old_iph +
- (old_iph->ihl << 2) +
- offsetof(struct icmphdr, type) -
- skb->data, sizeof(icmp_type),
- &icmp_type);
-
- if (!icmp_typep)
- return false;
-
- if (*icmp_typep > NR_ICMP_TYPES
- || (*icmp_typep <= ICMP_PARAMETERPROB
- && *icmp_typep != ICMP_ECHOREPLY
- && *icmp_typep != ICMP_ECHO))
- return false;
- }
-
- return true;
-}
+ struct ovs_key_ipv4_tunnel tun_key;
+ struct ovs_net *ovs_net;
+ struct vport *vport;
+ __be64 key;
-static void ipv4_build_icmp(struct sk_buff *skb, struct sk_buff *nskb,
- unsigned int mtu, unsigned int payload_length)
-{
- struct iphdr *iph, *old_iph = ip_hdr(skb);
- struct icmphdr *icmph;
- u8 *payload;
-
- iph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
- icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr));
- payload = skb_put(nskb, payload_length);
-
- /* IP */
- iph->version = 4;
- iph->ihl = sizeof(struct iphdr) >> 2;
- iph->tos = (old_iph->tos & IPTOS_TOS_MASK) |
- IPTOS_PREC_INTERNETCONTROL;
- iph->tot_len = htons(sizeof(struct iphdr)
- + sizeof(struct icmphdr)
- + payload_length);
- get_random_bytes(&iph->id, sizeof(iph->id));
- iph->frag_off = 0;
- iph->ttl = IPDEFTTL;
- iph->protocol = IPPROTO_ICMP;
- iph->daddr = old_iph->saddr;
- iph->saddr = old_iph->daddr;
-
- ip_send_check(iph);
-
- /* ICMP */
- icmph->type = ICMP_DEST_UNREACH;
- icmph->code = ICMP_FRAG_NEEDED;
- icmph->un.gateway = htonl(mtu);
- icmph->checksum = 0;
-
- nskb->csum = csum_partial((u8 *)icmph, sizeof(struct icmphdr), 0);
- nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_iph - skb->data,
- payload, payload_length,
- nskb->csum);
- icmph->checksum = csum_fold(nskb->csum);
-}
+ ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
+ if ((tpi->flags & TUNNEL_KEY) && (tpi->flags & TUNNEL_SEQ))
+ vport = rcu_dereference(ovs_net->vport_net.gre64_vport);
+ else
+ vport = rcu_dereference(ovs_net->vport_net.gre_vport);
+ if (unlikely(!vport))
+ return PACKET_REJECT;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static bool ipv6_should_icmp(struct sk_buff *skb)
-{
- struct ipv6hdr *old_ipv6h = ipv6_hdr(skb);
- int addr_type;
- int payload_off = (u8 *)(old_ipv6h + 1) - skb->data;
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
-
- /* Check source address is valid. */
- addr_type = ipv6_addr_type(&old_ipv6h->saddr);
- if (addr_type & IPV6_ADDR_MULTICAST || addr_type == IPV6_ADDR_ANY)
- return false;
-
- /* Don't reply to unspecified addresses. */
- if (ipv6_addr_type(&old_ipv6h->daddr) == IPV6_ADDR_ANY)
- return false;
-
- /* Don't respond to ICMP error messages. */
- payload_off = ipv6_skip_exthdr(skb, payload_off, &nexthdr);
- if (payload_off < 0)
- return false;
-
- if (nexthdr == NEXTHDR_ICMP) {
- u8 icmp_type, *icmp_typep;
-
- icmp_typep = skb_header_pointer(skb, payload_off +
- offsetof(struct icmp6hdr,
- icmp6_type),
- sizeof(icmp_type), &icmp_type);
-
- if (!icmp_typep || !(*icmp_typep & ICMPV6_INFOMSG_MASK))
- return false;
- }
+ key = key_to_tunnel_id(tpi->key, tpi->seq);
+ ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, filter_tnl_flags(tpi->flags));
- return true;
+ ovs_vport_receive(vport, skb, &tun_key);
+ return PACKET_RCVD;
}
-static void ipv6_build_icmp(struct sk_buff *skb, struct sk_buff *nskb,
- unsigned int mtu, unsigned int payload_length)
+/* Called with rcu_read_lock and BH disabled. */
+static int gre_err(struct sk_buff *skb, u32 info,
+ const struct tnl_ptk_info *tpi)
{
- struct ipv6hdr *ipv6h, *old_ipv6h = ipv6_hdr(skb);
- struct icmp6hdr *icmp6h;
- u8 *payload;
-
- ipv6h = (struct ipv6hdr *)skb_put(nskb, sizeof(struct ipv6hdr));
- icmp6h = (struct icmp6hdr *)skb_put(nskb, sizeof(struct icmp6hdr));
- payload = skb_put(nskb, payload_length);
-
- /* IPv6 */
- ipv6h->version = 6;
- ipv6h->priority = 0;
- memset(&ipv6h->flow_lbl, 0, sizeof(ipv6h->flow_lbl));
- ipv6h->payload_len = htons(sizeof(struct icmp6hdr)
- + payload_length);
- ipv6h->nexthdr = NEXTHDR_ICMP;
- ipv6h->hop_limit = IPV6_DEFAULT_HOPLIMIT;
- ipv6_addr_copy(&ipv6h->daddr, &old_ipv6h->saddr);
- ipv6_addr_copy(&ipv6h->saddr, &old_ipv6h->daddr);
-
- /* ICMPv6 */
- icmp6h->icmp6_type = ICMPV6_PKT_TOOBIG;
- icmp6h->icmp6_code = 0;
- icmp6h->icmp6_cksum = 0;
- icmp6h->icmp6_mtu = htonl(mtu);
-
- nskb->csum = csum_partial((u8 *)icmp6h, sizeof(struct icmp6hdr), 0);
- nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_ipv6h - skb->data,
- payload, payload_length,
- nskb->csum);
- icmp6h->icmp6_cksum = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
- sizeof(struct icmp6hdr)
- + payload_length,
- ipv6h->nexthdr, nskb->csum);
-}
-#endif /* IPv6 */
+ struct ovs_net *ovs_net;
+ struct vport *vport;
-static bool send_frag_needed(struct vport *vport,
- const struct mutable_config *mutable,
- struct sk_buff *skb, unsigned int mtu,
- __be32 flow_key)
-{
- unsigned int eth_hdr_len = ETH_HLEN;
- unsigned int total_length = 0, header_length = 0, payload_length;
- struct ethhdr *eh, *old_eh = eth_hdr(skb);
- struct sk_buff *nskb;
-
- /* Sanity check */
- if (skb->protocol == htons(ETH_P_IP)) {
- if (mtu < IP_MIN_MTU)
- return false;
-
- if (!ipv4_should_icmp(skb))
- return true;
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
- if (mtu < IPV6_MIN_MTU)
- return false;
-
- /* In theory we should do PMTUD on IPv6 multicast messages but
- * we don't have an address to send from so just fragment. */
- if (ipv6_addr_type(&ipv6_hdr(skb)->daddr) & IPV6_ADDR_MULTICAST)
- return false;
-
- if (!ipv6_should_icmp(skb))
- return true;
- }
-#endif
+ ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
+ if ((tpi->flags & TUNNEL_KEY) && (tpi->flags & TUNNEL_SEQ))
+ vport = rcu_dereference(ovs_net->vport_net.gre64_vport);
else
- return false;
+ vport = rcu_dereference(ovs_net->vport_net.gre_vport);
- /* Allocate */
- if (old_eh->h_proto == htons(ETH_P_8021Q))
- eth_hdr_len = VLAN_ETH_HLEN;
-
- payload_length = skb->len - eth_hdr_len;
- if (skb->protocol == htons(ETH_P_IP)) {
- header_length = sizeof(struct iphdr) + sizeof(struct icmphdr);
- total_length = min_t(unsigned int, header_length +
- payload_length, 576);
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else {
- header_length = sizeof(struct ipv6hdr) +
- sizeof(struct icmp6hdr);
- total_length = min_t(unsigned int, header_length +
- payload_length, IPV6_MIN_MTU);
- }
-#endif
-
- total_length = min(total_length, mutable->mtu);
- payload_length = total_length - header_length;
-
- nskb = dev_alloc_skb(NET_IP_ALIGN + eth_hdr_len + header_length +
- payload_length);
- if (!nskb)
- return false;
-
- skb_reserve(nskb, NET_IP_ALIGN);
-
- /* Ethernet / VLAN */
- eh = (struct ethhdr *)skb_put(nskb, eth_hdr_len);
- memcpy(eh->h_dest, old_eh->h_source, ETH_ALEN);
- memcpy(eh->h_source, mutable->eth_addr, ETH_ALEN);
- nskb->protocol = eh->h_proto = old_eh->h_proto;
- if (old_eh->h_proto == htons(ETH_P_8021Q)) {
- struct vlan_ethhdr *vh = (struct vlan_ethhdr *)eh;
-
- vh->h_vlan_TCI = vlan_eth_hdr(skb)->h_vlan_TCI;
- vh->h_vlan_encapsulated_proto = skb->protocol;
- }
- skb_reset_mac_header(nskb);
-
- /* Protocol */
- if (skb->protocol == htons(ETH_P_IP))
- ipv4_build_icmp(skb, nskb, mtu, payload_length);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ if (unlikely(!vport))
+ return PACKET_REJECT;
else
- ipv6_build_icmp(skb, nskb, mtu, payload_length);
-#endif
-
- /* Assume that flow based keys are symmetric with respect to input
- * and output and use the key that we were going to put on the
- * outgoing packet for the fake received packet. If the keys are
- * not symmetric then PMTUD needs to be disabled since we won't have
- * any way of synthesizing packets. */
- if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH &&
- mutable->port_config.flags & GRE_F_OUT_KEY_ACTION)
- OVS_CB(nskb)->tun_id = flow_key;
-
- compute_ip_summed(nskb, false);
- vport_receive(vport, nskb);
-
- return true;
+ return PACKET_RCVD;
}
-static struct sk_buff *check_headroom(struct sk_buff *skb, int headroom)
+static int __send(struct vport *vport, struct sk_buff *skb,
+ int tunnel_hlen,
+ __be32 seq, __be16 gre64_flag)
{
- if (skb_headroom(skb) < headroom || skb_header_cloned(skb)) {
- struct sk_buff *nskb = skb_realloc_headroom(skb, headroom + 16);
- if (!nskb) {
- kfree_skb(skb);
- return ERR_PTR(-ENOMEM);
- }
-
- set_skb_csum_bits(skb, nskb);
-
- if (skb->sk)
- skb_set_owner_w(nskb, skb->sk);
-
- dev_kfree_skb(skb);
- return nskb;
- }
-
- return skb;
-}
+ struct rtable *rt;
+ int min_headroom;
+ __be16 df;
+ __be32 saddr;
+ int err;
-static void create_gre_header(struct sk_buff *skb,
- const struct mutable_config *mutable)
-{
- struct iphdr *iph = ip_hdr(skb);
- __be16 *flags = (__be16 *)(iph + 1);
- __be16 *protocol = flags + 1;
- __be32 *options = (__be32 *)((u8 *)iph + mutable->tunnel_hlen
- - GRE_HEADER_SECTION);
-
- *protocol = htons(ETH_P_TEB);
- *flags = 0;
-
- /* Work backwards over the options so the checksum is last. */
- if (mutable->port_config.out_key ||
- mutable->port_config.flags & GRE_F_OUT_KEY_ACTION) {
- *flags |= GRE_KEY;
-
- if (mutable->port_config.flags & GRE_F_OUT_KEY_ACTION)
- *options = OVS_CB(skb)->tun_id;
- else
- *options = mutable->port_config.out_key;
-
- options--;
+ /* Route lookup */
+ saddr = OVS_CB(skb)->tun_key->ipv4_src;
+ rt = find_route(ovs_dp_get_net(vport->dp),
+ &saddr,
+ OVS_CB(skb)->tun_key->ipv4_dst,
+ IPPROTO_GRE,
+ OVS_CB(skb)->tun_key->ipv4_tos,
+ skb->mark);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto error;
}
- if (mutable->port_config.flags & GRE_F_OUT_CSUM) {
- *flags |= GRE_CSUM;
+ min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
+ + tunnel_hlen + sizeof(struct iphdr)
+ + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
- *options = 0;
- *(__sum16 *)options = csum_fold(skb_checksum(skb,
- sizeof(struct iphdr),
- skb->len - sizeof(struct iphdr),
- 0));
+ if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
+ int head_delta = SKB_DATA_ALIGN(min_headroom -
+ skb_headroom(skb) +
+ 16);
+ err = pskb_expand_head(skb, max_t(int, head_delta, 0),
+ 0, GFP_ATOMIC);
+ if (unlikely(err))
+ goto err_free_rt;
}
-}
-static int check_checksum(struct sk_buff *skb)
-{
- struct iphdr *iph = ip_hdr(skb);
- __be16 flags = *(__be16 *)(iph + 1);
- __sum16 csum = 0;
-
- if (flags & GRE_CSUM) {
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- csum = csum_fold(skb->csum);
-
- if (!csum)
- break;
- /* Fall through. */
-
- case CHECKSUM_NONE:
- skb->csum = 0;
- csum = __skb_checksum_complete(skb);
- skb->ip_summed = CHECKSUM_COMPLETE;
- break;
+ if (vlan_tx_tag_present(skb)) {
+ if (unlikely(!__vlan_put_tag(skb,
+ skb->vlan_proto,
+ vlan_tx_tag_get(skb)))) {
+ err = -ENOMEM;
+ goto err_free_rt;
}
+ vlan_set_tci(skb, 0);
}
- return (csum == 0);
-}
-
-static int parse_gre_header(struct iphdr *iph, __be16 *flags, __be32 *key)
-{
- /* IP and ICMP protocol handlers check that the IHL is valid. */
- __be16 *flagsp = (__be16 *)((u8 *)iph + (iph->ihl << 2));
- __be16 *protocol = flagsp + 1;
- __be32 *options = (__be32 *)(protocol + 1);
- int hdr_len;
-
- *flags = *flagsp;
-
- if (*flags & (GRE_VERSION | GRE_ROUTING))
- return -EINVAL;
-
- if (*protocol != htons(ETH_P_TEB))
- return -EINVAL;
-
- hdr_len = GRE_HEADER_SECTION;
-
- if (*flags & GRE_CSUM) {
- hdr_len += GRE_HEADER_SECTION;
- options++;
+ /* Push Tunnel header. */
+ skb = __build_header(skb, tunnel_hlen, seq, gre64_flag);
+ if (unlikely(!skb)) {
+ err = 0;
+ goto err_free_rt;
}
- if (*flags & GRE_KEY) {
- hdr_len += GRE_HEADER_SECTION;
-
- *key = *options;
- options++;
- } else
- *key = 0;
+ df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
+ htons(IP_DF) : 0;
- if (*flags & GRE_SEQ)
- hdr_len += GRE_HEADER_SECTION;
+ skb->local_df = 1;
- return hdr_len;
+ return iptunnel_xmit(rt, skb, saddr,
+ OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
+ OVS_CB(skb)->tun_key->ipv4_tos,
+ OVS_CB(skb)->tun_key->ipv4_ttl, df, false);
+err_free_rt:
+ ip_rt_put(rt);
+error:
+ return err;
}
-static inline u8 ecn_encapsulate(u8 tos, struct sk_buff *skb)
-{
- u8 inner;
-
- if (skb->protocol == htons(ETH_P_IP))
- inner = ((struct iphdr *)skb_network_header(skb))->tos;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6))
- inner = ipv6_get_dsfield((struct ipv6hdr *)skb_network_header(skb));
-#endif
- else
- inner = 0;
-
- return INET_ECN_encapsulate(tos, inner);
-}
+static struct gre_cisco_protocol gre_protocol = {
+ .handler = gre_rcv,
+ .err_handler = gre_err,
+ .priority = 1,
+};
-static inline void ecn_decapsulate(u8 tos, struct sk_buff *skb)
+static int gre_ports;
+static int gre_init(void)
{
- if (INET_ECN_is_ce(tos)) {
- __be16 protocol = skb->protocol;
- unsigned int nw_header = skb_network_header(skb) - skb->data;
-
- if (skb->protocol == htons(ETH_P_8021Q)) {
- if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
- return;
+ int err;
- protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
- nw_header += VLAN_HLEN;
- }
+ gre_ports++;
+ if (gre_ports > 1)
+ return 0;
- if (protocol == htons(ETH_P_IP)) {
- if (unlikely(!pskb_may_pull(skb, nw_header
- + sizeof(struct iphdr))))
- return;
+ err = gre_cisco_register(&gre_protocol);
+ if (err)
+ pr_warn("cannot register gre protocol handler\n");
- IP_ECN_set_ce((struct iphdr *)(nw_header + skb->data));
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (protocol == htons(ETH_P_IPV6)) {
- if (unlikely(!pskb_may_pull(skb, nw_header
- + sizeof(struct ipv6hdr))))
- return;
-
- IP6_ECN_set_ce((struct ipv6hdr *)(nw_header
- + skb->data));
- }
-#endif
- }
+ return err;
}
-static struct sk_buff *handle_gso(struct sk_buff *skb)
+static void gre_exit(void)
{
- if (skb_is_gso(skb)) {
- struct sk_buff *nskb = skb_gso_segment(skb, 0);
-
- dev_kfree_skb(skb);
- return nskb;
- }
+ gre_ports--;
+ if (gre_ports > 0)
+ return;
- return skb;
+ gre_cisco_unregister(&gre_protocol);
}
-static int handle_csum_offload(struct sk_buff *skb)
-{
- if (skb->ip_summed == CHECKSUM_PARTIAL)
- return skb_checksum_help(skb);
- else {
- skb->ip_summed = CHECKSUM_NONE;
- return 0;
- }
-}
-
-/* Called with rcu_read_lock. */
-static void gre_err(struct sk_buff *skb, u32 info)
+static const char *gre_get_name(const struct vport *vport)
{
- struct vport *vport;
- const struct mutable_config *mutable;
- const int type = icmp_hdr(skb)->type;
- const int code = icmp_hdr(skb)->code;
- int mtu = ntohs(icmp_hdr(skb)->un.frag.mtu);
-
- struct iphdr *iph;
- __be16 flags;
- __be32 key;
- int tunnel_hdr_len, tot_hdr_len;
- unsigned int orig_mac_header;
- unsigned int orig_nw_header;
-
- if (type != ICMP_DEST_UNREACH || code != ICMP_FRAG_NEEDED)
- return;
-
- /* The mimimum size packet that we would actually be able to process:
- * encapsulating IP header, minimum GRE header, Ethernet header,
- * inner IPv4 header. */
- if (!pskb_may_pull(skb, sizeof(struct iphdr) + GRE_HEADER_SECTION +
- ETH_HLEN + sizeof(struct iphdr)))
- return;
-
- iph = (struct iphdr *)skb->data;
-
- tunnel_hdr_len = parse_gre_header(iph, &flags, &key);
- if (tunnel_hdr_len < 0)
- return;
-
- vport = find_port(iph->saddr, iph->daddr, key, FIND_PORT_ANY, &mutable);
- if (!vport)
- return;
-
- /* Packets received by this function were previously sent by us, so
- * any comparisons should be to the output values, not the input.
- * However, it's not really worth it to have a hash table based on
- * output keys (especially since ICMP error handling of tunneled packets
- * isn't that reliable anyways). Therefore, we do a lookup based on the
- * out key as if it were the in key and then check to see if the input
- * and output keys are the same. */
- if (mutable->port_config.in_key != mutable->port_config.out_key)
- return;
-
- if (!!(mutable->port_config.flags & GRE_F_IN_KEY_MATCH) !=
- !!(mutable->port_config.flags & GRE_F_OUT_KEY_ACTION))
- return;
-
- if ((mutable->port_config.flags & GRE_F_OUT_CSUM) && !(flags & GRE_CSUM))
- return;
-
- tunnel_hdr_len += iph->ihl << 2;
-
- orig_mac_header = skb_mac_header(skb) - skb->data;
- orig_nw_header = skb_network_header(skb) - skb->data;
- skb_set_mac_header(skb, tunnel_hdr_len);
-
- tot_hdr_len = tunnel_hdr_len + ETH_HLEN;
-
- skb->protocol = eth_hdr(skb)->h_proto;
- if (skb->protocol == htons(ETH_P_8021Q)) {
- tot_hdr_len += VLAN_HLEN;
- skb->protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
- }
-
- skb_set_network_header(skb, tot_hdr_len);
- mtu -= tot_hdr_len;
-
- if (skb->protocol == htons(ETH_P_IP))
- tot_hdr_len += sizeof(struct iphdr);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6))
- tot_hdr_len += sizeof(struct ipv6hdr);
-#endif
- else
- goto out;
-
- if (!pskb_may_pull(skb, tot_hdr_len))
- goto out;
-
- if (skb->protocol == htons(ETH_P_IP)) {
- if (mtu < IP_MIN_MTU) {
- if (ntohs(ip_hdr(skb)->tot_len) >= IP_MIN_MTU)
- mtu = IP_MIN_MTU;
- else
- goto out;
- }
-
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
- if (mtu < IPV6_MIN_MTU) {
- unsigned int packet_length = sizeof(struct ipv6hdr) +
- ntohs(ipv6_hdr(skb)->payload_len);
-
- if (packet_length >= IPV6_MIN_MTU
- || ntohs(ipv6_hdr(skb)->payload_len) == 0)
- mtu = IPV6_MIN_MTU;
- else
- goto out;
- }
- }
-#endif
-
- __pskb_pull(skb, tunnel_hdr_len);
- send_frag_needed(vport, mutable, skb, mtu, key);
- skb_push(skb, tunnel_hdr_len);
-
-out:
- skb_set_mac_header(skb, orig_mac_header);
- skb_set_network_header(skb, orig_nw_header);
- skb->protocol = htons(ETH_P_IP);
+ return vport_priv(vport);
}
-/* Called with rcu_read_lock. */
-static int gre_rcv(struct sk_buff *skb)
+static struct vport *gre_create(const struct vport_parms *parms)
{
+ struct net *net = ovs_dp_get_net(parms->dp);
+ struct ovs_net *ovs_net;
struct vport *vport;
- const struct mutable_config *mutable;
- int hdr_len;
- struct iphdr *iph;
- __be16 flags;
- __be32 key;
-
- if (!pskb_may_pull(skb, GRE_HEADER_SECTION + ETH_HLEN))
- goto error;
-
- if (!check_checksum(skb))
- goto error;
-
- iph = ip_hdr(skb);
-
- hdr_len = parse_gre_header(iph, &flags, &key);
- if (hdr_len < 0)
- goto error;
+ int err;
- vport = find_port(iph->daddr, iph->saddr, key, FIND_PORT_ANY, &mutable);
- if (!vport) {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
- goto error;
- }
+ err = gre_init();
+ if (err)
+ return ERR_PTR(err);
- if ((mutable->port_config.flags & GRE_F_IN_CSUM) && !(flags & GRE_CSUM)) {
- vport_record_error(vport, VPORT_E_RX_CRC);
+ ovs_net = net_generic(net, ovs_net_id);
+ if (ovsl_dereference(ovs_net->vport_net.gre_vport)) {
+ vport = ERR_PTR(-EEXIST);
goto error;
}
- if (!pskb_pull(skb, hdr_len) || !pskb_may_pull(skb, ETH_HLEN)) {
- vport_record_error(vport, VPORT_E_RX_ERROR);
+ vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms);
+ if (IS_ERR(vport))
goto error;
- }
-
- skb->pkt_type = PACKET_HOST;
- skb->protocol = eth_type_trans(skb, skb->dev);
- skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN);
- skb_dst_drop(skb);
- nf_reset(skb);
- secpath_reset(skb);
- skb_reset_network_header(skb);
-
- ecn_decapsulate(iph->tos, skb);
-
- if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH)
- OVS_CB(skb)->tun_id = key;
- else
- OVS_CB(skb)->tun_id = 0;
-
- skb_push(skb, ETH_HLEN);
- compute_ip_summed(skb, false);
-
- vport_receive(vport, skb);
-
- return 0;
+ strncpy(vport_priv(vport), parms->name, IFNAMSIZ);
+ rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport);
+ return vport;
error:
- kfree_skb(skb);
- return 0;
+ gre_exit();
+ return vport;
}
-static int build_packet(struct vport *vport, const struct mutable_config *mutable,
- struct iphdr *iph, struct rtable *rt, int max_headroom,
- int mtu, struct sk_buff *skb)
+static void gre_tnl_destroy(struct vport *vport)
{
- int err;
- struct iphdr *new_iph;
- int orig_len = skb->len;
- __be16 frag_off = iph->frag_off;
-
- skb = check_headroom(skb, max_headroom);
- if (unlikely(IS_ERR(skb)))
- goto error;
-
- err = handle_csum_offload(skb);
- if (err)
- goto error_free;
-
- if (skb->protocol == htons(ETH_P_IP)) {
- struct iphdr *old_iph = ip_hdr(skb);
+ struct net *net = ovs_dp_get_net(vport->dp);
+ struct ovs_net *ovs_net;
- if ((old_iph->frag_off & htons(IP_DF)) &&
- mtu < ntohs(old_iph->tot_len)) {
- if (send_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id))
- goto error_free;
- }
-
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
- unsigned int packet_length = skb->len - ETH_HLEN
- - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0);
-
- /* IPv6 requires PMTUD if the packet is above the minimum MTU. */
- if (packet_length > IPV6_MIN_MTU)
- frag_off = htons(IP_DF);
-
- if (mtu < packet_length) {
- if (send_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id))
- goto error_free;
- }
- }
-#endif
-
- skb_reset_transport_header(skb);
- new_iph = (struct iphdr *)skb_push(skb, mutable->tunnel_hlen);
- skb_reset_network_header(skb);
-
- memcpy(new_iph, iph, sizeof(struct iphdr));
- new_iph->frag_off = frag_off;
- ip_select_ident(new_iph, &rt->u.dst, NULL);
-
- create_gre_header(skb, mutable);
-
- /* Allow our local IP stack to fragment the outer packet even if the
- * DF bit is set as a last resort. */
- skb->local_df = 1;
-
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- IPCB(skb)->flags = 0;
-
- err = ip_local_out(skb);
- if (likely(net_xmit_eval(err) == 0))
- return orig_len;
- else {
- vport_record_error(vport, VPORT_E_TX_ERROR);
- return 0;
- }
-
-error_free:
- kfree_skb(skb);
-error:
- vport_record_error(vport, VPORT_E_TX_DROPPED);
+ ovs_net = net_generic(net, ovs_net_id);
- return 0;
+ RCU_INIT_POINTER(ovs_net->vport_net.gre_vport, NULL);
+ ovs_vport_deferred_free(vport);
+ gre_exit();
}
static int gre_send(struct vport *vport, struct sk_buff *skb)
{
- struct gre_vport *gre_vport = gre_vport_priv(vport);
- const struct mutable_config *mutable = rcu_dereference(gre_vport->mutable);
-
- struct iphdr *old_iph;
- int orig_len;
- struct iphdr iph;
- struct rtable *rt;
- int max_headroom;
- int mtu;
-
- /* Validate the protocol headers before we try to use them. */
- if (skb->protocol == htons(ETH_P_8021Q)) {
- if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
- goto error_free;
-
- skb->protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
- skb_set_network_header(skb, VLAN_ETH_HLEN);
- }
-
- if (skb->protocol == htons(ETH_P_IP)) {
- if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
- + sizeof(struct iphdr) - skb->data)))
- skb->protocol = 0;
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
- if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
- + sizeof(struct ipv6hdr) - skb->data)))
- skb->protocol = 0;
- }
-#endif
- old_iph = ip_hdr(skb);
-
- iph.tos = mutable->port_config.tos;
- if (mutable->port_config.flags & GRE_F_TOS_INHERIT) {
- if (skb->protocol == htons(ETH_P_IP))
- iph.tos = old_iph->tos;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6))
- iph.tos = ipv6_get_dsfield(ipv6_hdr(skb));
-#endif
- }
- iph.tos = ecn_encapsulate(iph.tos, skb);
-
- {
- struct flowi fl = { .nl_u = { .ip4_u =
- { .daddr = mutable->port_config.daddr,
- .saddr = mutable->port_config.saddr,
- .tos = RT_TOS(iph.tos) } },
- .proto = IPPROTO_GRE };
-
- if (ip_route_output_key(&init_net, &rt, &fl))
- goto error_free;
- }
-
- iph.ttl = mutable->port_config.ttl;
- if (mutable->port_config.flags & GRE_F_TTL_INHERIT) {
- if (skb->protocol == htons(ETH_P_IP))
- iph.ttl = old_iph->ttl;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6))
- iph.ttl = ipv6_hdr(skb)->hop_limit;
-#endif
- }
- if (!iph.ttl)
- iph.ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
-
- iph.frag_off = (mutable->port_config.flags & GRE_F_PMTUD) ? htons(IP_DF) : 0;
- if (iph.frag_off)
- mtu = dst_mtu(&rt->u.dst)
- - ETH_HLEN
- - mutable->tunnel_hlen
- - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0);
- else
- mtu = mutable->mtu;
-
- if (skb->protocol == htons(ETH_P_IP)) {
- iph.frag_off |= old_iph->frag_off & htons(IP_DF);
- mtu = max(mtu, IP_MIN_MTU);
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6))
- mtu = max(mtu, IPV6_MIN_MTU);
-#endif
-
- iph.version = 4;
- iph.ihl = sizeof(struct iphdr) >> 2;
- iph.protocol = IPPROTO_GRE;
- iph.daddr = rt->rt_dst;
- iph.saddr = rt->rt_src;
-
- nf_reset(skb);
- secpath_reset(skb);
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->u.dst);
-
- /* If we are doing GSO on a pskb it is better to make sure that the
- * headroom is correct now. We will only have to copy the portion in
- * the linear data area and GSO will preserve headroom when it creates
- * the segments. This is particularly beneficial on Xen where we get
- * lots of GSO pskbs. Conversely, we delay copying if it is just to
- * get our own writable clone because GSO may do the copy for us. */
- max_headroom = LL_RESERVED_SPACE(rt->u.dst.dev) + rt->u.dst.header_len
- + mutable->tunnel_hlen;
-
- if (skb_headroom(skb) < max_headroom) {
- skb = check_headroom(skb, max_headroom);
- if (unlikely(IS_ERR(skb))) {
- vport_record_error(vport, VPORT_E_TX_DROPPED);
- goto error;
- }
- }
-
- forward_ip_summed(skb);
-
- if (unlikely(vswitch_skb_checksum_setup(skb)))
- goto error_free;
-
- skb = handle_gso(skb);
- if (unlikely(IS_ERR(skb))) {
- vport_record_error(vport, VPORT_E_TX_DROPPED);
- goto error;
- }
+ int hlen;
- /* Process GSO segments. Try to do any work for the entire packet that
- * doesn't involve actually writing to it before this point. */
- orig_len = 0;
- do {
- struct sk_buff *next_skb = skb->next;
- skb->next = NULL;
-
- orig_len += build_packet(vport, mutable, &iph, rt, max_headroom, mtu, skb);
-
- skb = next_skb;
- } while (skb);
+ if (unlikely(!OVS_CB(skb)->tun_key))
+ return -EINVAL;
- return orig_len;
+ hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags);
-error_free:
- kfree_skb(skb);
- vport_record_error(vport, VPORT_E_TX_ERROR);
-error:
- return 0;
+ return __send(vport, skb, hlen, 0, 0);
}
-static struct net_protocol gre_protocol_handlers = {
- .handler = gre_rcv,
- .err_handler = gre_err,
+const struct vport_ops ovs_gre_vport_ops = {
+ .type = OVS_VPORT_TYPE_GRE,
+ .create = gre_create,
+ .destroy = gre_tnl_destroy,
+ .get_name = gre_get_name,
+ .send = gre_send,
};
-static int gre_init(void)
-{
- int err;
-
- err = inet_add_protocol(&gre_protocol_handlers, IPPROTO_GRE);
- if (err)
- printk(KERN_WARNING "openvswitch: cannot register gre protocol handler\n");
-
- return err;
-}
-
-static void gre_exit(void)
-{
- tbl_destroy(port_table, NULL);
- inet_del_protocol(&gre_protocol_handlers, IPPROTO_GRE);
-}
-
-static int set_config(const struct vport *cur_vport,
- struct mutable_config *mutable, const void __user *uconfig)
-{
- const struct vport *old_vport;
- const struct mutable_config *old_mutable;
- int port_type;
-
- if (copy_from_user(&mutable->port_config, uconfig, sizeof(struct gre_port_config)))
- return -EFAULT;
-
- if (mutable->port_config.daddr == 0)
- return -EINVAL;
-
- if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH) {
- port_type = FIND_PORT_MATCH;
- mutable->port_config.in_key = 0;
- } else
- port_type = FIND_PORT_KEY;
-
- old_vport = find_port(mutable->port_config.saddr,
- mutable->port_config.daddr,
- mutable->port_config.in_key, port_type,
- &old_mutable);
-
- if (old_vport && old_vport != cur_vport)
- return -EEXIST;
-
- if (mutable->port_config.flags & GRE_F_OUT_KEY_ACTION)
- mutable->port_config.out_key = 0;
-
- mutable->tunnel_hlen = sizeof(struct iphdr) + GRE_HEADER_SECTION;
-
- if (mutable->port_config.flags & GRE_F_OUT_CSUM)
- mutable->tunnel_hlen += GRE_HEADER_SECTION;
-
- if (mutable->port_config.out_key ||
- mutable->port_config.flags & GRE_F_OUT_KEY_ACTION)
- mutable->tunnel_hlen += GRE_HEADER_SECTION;
-
- return 0;
-}
-
-static struct vport *gre_create(const char *name, const void __user *config)
+/* GRE64 vport. */
+static struct vport *gre64_create(const struct vport_parms *parms)
{
+ struct net *net = ovs_dp_get_net(parms->dp);
+ struct ovs_net *ovs_net;
struct vport *vport;
- struct gre_vport *gre_vport;
int err;
- vport = vport_alloc(sizeof(struct gre_vport), &gre_vport_ops);
- if (IS_ERR(vport)) {
- err = PTR_ERR(vport);
- goto error;
- }
-
- gre_vport = gre_vport_priv(vport);
-
- strcpy(gre_vport->name, name);
-
- gre_vport->mutable = kmalloc(sizeof(struct mutable_config), GFP_KERNEL);
- if (!gre_vport->mutable) {
- err = -ENOMEM;
- goto error_free_vport;
- }
-
- vport_gen_rand_ether_addr(gre_vport->mutable->eth_addr);
- gre_vport->mutable->mtu = ETH_DATA_LEN;
-
- err = set_config(NULL, gre_vport->mutable, config);
- if (err)
- goto error_free_mutable;
-
- err = add_port(vport);
+ err = gre_init();
if (err)
- goto error_free_mutable;
-
- return vport;
-
-error_free_mutable:
- kfree(gre_vport->mutable);
-error_free_vport:
- vport_free(vport);
-error:
- return ERR_PTR(err);
-}
-
-static int gre_modify(struct vport *vport, const void __user *config)
-{
- struct gre_vport *gre_vport = gre_vport_priv(vport);
- struct mutable_config *mutable;
- int err;
- int update_hash = 0;
+ return ERR_PTR(err);
- mutable = kmemdup(gre_vport->mutable, sizeof(struct mutable_config), GFP_KERNEL);
- if (!mutable) {
- err = -ENOMEM;
+ ovs_net = net_generic(net, ovs_net_id);
+ if (ovsl_dereference(ovs_net->vport_net.gre64_vport)) {
+ vport = ERR_PTR(-EEXIST);
goto error;
}
- err = set_config(vport, mutable, config);
- if (err)
- goto error_free;
-
- /* Only remove the port from the hash table if something that would
- * affect the lookup has changed. */
- if (gre_vport->mutable->port_config.saddr != mutable->port_config.saddr ||
- gre_vport->mutable->port_config.daddr != mutable->port_config.daddr ||
- gre_vport->mutable->port_config.in_key != mutable->port_config.in_key ||
- (gre_vport->mutable->port_config.flags & GRE_F_IN_KEY_MATCH) !=
- (mutable->port_config.flags & GRE_F_IN_KEY_MATCH))
- update_hash = 1;
-
-
- /* This update is not atomic but the lookup uses the config, which
- * serves as an inherent double check. */
- if (update_hash) {
- err = del_port(vport);
- if (err)
- goto error_free;
- }
-
- assign_config_rcu(vport, mutable);
-
- if (update_hash) {
- err = add_port(vport);
- if (err)
- goto error_free;
- }
-
- return 0;
+ vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre64_vport_ops, parms);
+ if (IS_ERR(vport))
+ goto error;
-error_free:
- kfree(mutable);
+ strncpy(vport_priv(vport), parms->name, IFNAMSIZ);
+ rcu_assign_pointer(ovs_net->vport_net.gre64_vport, vport);
+ return vport;
error:
- return err;
-}
-
-static void free_port(struct rcu_head *rcu)
-{
- struct gre_vport *gre_vport = container_of(rcu, struct gre_vport, rcu);
-
- kfree(gre_vport->mutable);
- vport_free(gre_vport_to_vport(gre_vport));
+ gre_exit();
+ return vport;
}
-static int gre_destroy(struct vport *vport)
+static void gre64_tnl_destroy(struct vport *vport)
{
- struct gre_vport *gre_vport = gre_vport_priv(vport);
- int port_type;
- const struct mutable_config *old_mutable;
-
- /* Do a hash table lookup to make sure that the port exists. It should
- * exist but might not if a modify failed earlier. */
- if (gre_vport->mutable->port_config.flags & GRE_F_IN_KEY_MATCH)
- port_type = FIND_PORT_MATCH;
- else
- port_type = FIND_PORT_KEY;
+ struct net *net = ovs_dp_get_net(vport->dp);
+ struct ovs_net *ovs_net;
- if (vport == find_port(gre_vport->mutable->port_config.saddr,
- gre_vport->mutable->port_config.daddr,
- gre_vport->mutable->port_config.in_key, port_type, &old_mutable))
- del_port(vport);
+ ovs_net = net_generic(net, ovs_net_id);
- call_rcu(&gre_vport->rcu, free_port);
-
- return 0;
+ rcu_assign_pointer(ovs_net->vport_net.gre64_vport, NULL);
+ ovs_vport_deferred_free(vport);
+ gre_exit();
}
-static int gre_set_mtu(struct vport *vport, int mtu)
+static __be32 be64_get_high32(__be64 x)
{
- struct gre_vport *gre_vport = gre_vport_priv(vport);
- struct mutable_config *mutable;
-
- mutable = kmemdup(gre_vport->mutable, sizeof(struct mutable_config), GFP_KERNEL);
- if (!mutable)
- return -ENOMEM;
-
- mutable->mtu = mtu;
- assign_config_rcu(vport, mutable);
-
- return 0;
+#ifdef __BIG_ENDIAN
+ return (__force __be32)((__force u64)x >> 32);
+#else
+ return (__force __be32)x;
+#endif
}
-static int gre_set_addr(struct vport *vport, const unsigned char *addr)
+static int gre64_send(struct vport *vport, struct sk_buff *skb)
{
- struct gre_vport *gre_vport = gre_vport_priv(vport);
- struct mutable_config *mutable;
-
- mutable = kmemdup(gre_vport->mutable, sizeof(struct mutable_config), GFP_KERNEL);
- if (!mutable)
- return -ENOMEM;
-
- memcpy(mutable->eth_addr, addr, ETH_ALEN);
- assign_config_rcu(vport, mutable);
-
- return 0;
-}
+ int hlen = GRE_HEADER_SECTION + /* GRE Hdr */
+ GRE_HEADER_SECTION + /* GRE Key */
+ GRE_HEADER_SECTION; /* GRE SEQ */
+ __be32 seq;
+ if (unlikely(!OVS_CB(skb)->tun_key))
+ return -EINVAL;
-static const char *gre_get_name(const struct vport *vport)
-{
- const struct gre_vport *gre_vport = gre_vport_priv(vport);
- return gre_vport->name;
-}
+ if (OVS_CB(skb)->tun_key->tun_flags & TUNNEL_CSUM)
+ hlen += GRE_HEADER_SECTION;
-static const unsigned char *gre_get_addr(const struct vport *vport)
-{
- const struct gre_vport *gre_vport = gre_vport_priv(vport);
- return rcu_dereference(gre_vport->mutable)->eth_addr;
+ seq = be64_get_high32(OVS_CB(skb)->tun_key->tun_id);
+ return __send(vport, skb, hlen, seq, (TUNNEL_KEY|TUNNEL_SEQ));
}
-static int gre_get_mtu(const struct vport *vport)
-{
- const struct gre_vport *gre_vport = gre_vport_priv(vport);
- return rcu_dereference(gre_vport->mutable)->mtu;
-}
-
-struct vport_ops gre_vport_ops = {
- .type = "gre",
- .flags = VPORT_F_GEN_STATS | VPORT_F_TUN_ID,
- .init = gre_init,
- .exit = gre_exit,
- .create = gre_create,
- .modify = gre_modify,
- .destroy = gre_destroy,
- .set_mtu = gre_set_mtu,
- .set_addr = gre_set_addr,
+const struct vport_ops ovs_gre64_vport_ops = {
+ .type = OVS_VPORT_TYPE_GRE64,
+ .create = gre64_create,
+ .destroy = gre64_tnl_destroy,
.get_name = gre_get_name,
- .get_addr = gre_get_addr,
- .get_dev_flags = vport_gen_get_dev_flags,
- .is_running = vport_gen_is_running,
- .get_operstate = vport_gen_get_operstate,
- .get_mtu = gre_get_mtu,
- .send = gre_send,
+ .send = gre64_send,
};
+#endif