- * Significant portions of this file may be copied from parts of the Linux
- * kernel, by Linus Torvalds and others.
- */
-
-#include <linux/if_arp.h>
-#include <linux/if_ether.h>
-#include <linux/ip.h>
-#include <linux/if_vlan.h>
-#include <linux/in.h>
-#include <linux/in_route.h>
-#include <linux/jhash.h>
-#include <linux/kernel.h>
-#include <linux/version.h>
-#include <linux/workqueue.h>
-
-#include <net/dsfield.h>
-#include <net/dst.h>
-#include <net/icmp.h>
-#include <net/inet_ecn.h>
-#include <net/ip.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-#include <net/ipv6.h>
-#endif
-#include <net/route.h>
-#include <net/xfrm.h>
-
-#include "actions.h"
-#include "checksum.h"
-#include "datapath.h"
-#include "table.h"
-#include "tunnel.h"
-#include "vport.h"
-#include "vport-generic.h"
-#include "vport-internal_dev.h"
-
-#ifdef NEED_CACHE_TIMEOUT
-/*
- * On kernels where we can't quickly detect changes in the rest of the system
- * we use an expiration time to invalidate the cache. A shorter expiration
- * reduces the length of time that we may potentially blackhole packets while
- * a longer time increases performance by reducing the frequency that the
- * cache needs to be rebuilt. A variety of factors may cause the cache to be
- * invalidated before the expiration time but this is the maximum. The time
- * is expressed in jiffies.
- */
-#define MAX_CACHE_EXP HZ
-#endif
-
-/*
- * Interval to check for and remove caches that are no longer valid. Caches
- * are checked for validity before they are used for packet encapsulation and
- * old caches are removed at that time. However, if no packets are sent through
- * the tunnel then the cache will never be destroyed. Since it holds
- * references to a number of system objects, the cache will continue to use
- * system resources by not allowing those objects to be destroyed. The cache
- * cleaner is periodically run to free invalid caches. It does not
- * significantly affect system performance. A lower interval will release
- * resources faster but will itself consume resources by requiring more frequent
- * checks. A longer interval may result in messages being printed to the kernel
- * message buffer about unreleased resources. The interval is expressed in
- * jiffies.
- */
-#define CACHE_CLEANER_INTERVAL (5 * HZ)
-
-#define CACHE_DATA_ALIGN 16
-
-static struct tbl __rcu *port_table __read_mostly;
-
-static void cache_cleaner(struct work_struct *work);
-static DECLARE_DELAYED_WORK(cache_cleaner_wq, cache_cleaner);
-
-/*
- * These are just used as an optimization: they don't require any kind of
- * synchronization because we could have just as easily read the value before
- * the port change happened.
- */
-static unsigned int key_local_remote_ports __read_mostly;
-static unsigned int key_remote_ports __read_mostly;
-static unsigned int local_remote_ports __read_mostly;
-static unsigned int remote_ports __read_mostly;
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
-#define rt_dst(rt) (rt->dst)
-#else
-#define rt_dst(rt) (rt->u.dst)
-#endif
-
-static inline struct vport *tnl_vport_to_vport(const struct tnl_vport *tnl_vport)
-{
- return vport_from_priv(tnl_vport);
-}
-
-static inline struct tnl_vport *tnl_vport_table_cast(const struct tbl_node *node)
-{
- return container_of(node, struct tnl_vport, tbl_node);
-}
-
-/* This is analogous to rtnl_dereference for the tunnel cache. It checks that
- * cache_lock is held, so it is only for update side code.
- */
-static inline struct tnl_cache *cache_dereference(struct tnl_vport *tnl_vport)
-{
- return rcu_dereference_protected(tnl_vport->cache,
- lockdep_is_held(&tnl_vport->cache_lock));
-}
-
-static inline void schedule_cache_cleaner(void)
-{
- schedule_delayed_work(&cache_cleaner_wq, CACHE_CLEANER_INTERVAL);
-}
-
-static void free_cache(struct tnl_cache *cache)
-{
- if (!cache)
- return;
-
- flow_put(cache->flow);
- ip_rt_put(cache->rt);
- kfree(cache);
-}
-
-static void free_config_rcu(struct rcu_head *rcu)
-{
- struct tnl_mutable_config *c = container_of(rcu, struct tnl_mutable_config, rcu);
- kfree(c);
-}
-
-static void free_cache_rcu(struct rcu_head *rcu)
-{
- struct tnl_cache *c = container_of(rcu, struct tnl_cache, rcu);
- free_cache(c);
-}
-
-static void assign_config_rcu(struct vport *vport,
- struct tnl_mutable_config *new_config)
-{
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
- struct tnl_mutable_config *old_config;
-
- old_config = rtnl_dereference(tnl_vport->mutable);
- rcu_assign_pointer(tnl_vport->mutable, new_config);
- call_rcu(&old_config->rcu, free_config_rcu);
-}
-
-static void assign_cache_rcu(struct vport *vport, struct tnl_cache *new_cache)
-{
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
- struct tnl_cache *old_cache;
-
- old_cache = cache_dereference(tnl_vport);
- rcu_assign_pointer(tnl_vport->cache, new_cache);
-
- if (old_cache)
- call_rcu(&old_cache->rcu, free_cache_rcu);
-}
-
-static unsigned int *find_port_pool(const struct tnl_mutable_config *mutable)
-{
- if (mutable->flags & TNL_F_IN_KEY_MATCH) {
- if (mutable->saddr)
- return &local_remote_ports;
- else
- return &remote_ports;
- } else {
- if (mutable->saddr)
- return &key_local_remote_ports;
- else
- return &key_remote_ports;
- }
-}
-
-struct port_lookup_key {
- const struct tnl_mutable_config *mutable;
- __be64 key;
- u32 tunnel_type;
- __be32 saddr;
- __be32 daddr;
-};
-
-/*
- * Modifies 'target' to store the rcu_dereferenced pointer that was used to do
- * the comparision.
- */
-static int port_cmp(const struct tbl_node *node, void *target)
-{
- const struct tnl_vport *tnl_vport = tnl_vport_table_cast(node);
- struct port_lookup_key *lookup = target;
-
- lookup->mutable = rcu_dereference_rtnl(tnl_vport->mutable);
-
- return (lookup->mutable->tunnel_type == lookup->tunnel_type &&
- lookup->mutable->daddr == lookup->daddr &&
- lookup->mutable->in_key == lookup->key &&
- lookup->mutable->saddr == lookup->saddr);
-}
-
-static u32 port_hash(struct port_lookup_key *k)
-{
- u32 x = jhash_3words((__force u32)k->saddr, (__force u32)k->daddr,
- k->tunnel_type, 0);
- return jhash_2words((__force u64)k->key >> 32, (__force u32)k->key, x);
-}
-
-static u32 mutable_hash(const struct tnl_mutable_config *mutable)
-{
- struct port_lookup_key lookup;
-
- lookup.saddr = mutable->saddr;
- lookup.daddr = mutable->daddr;
- lookup.key = mutable->in_key;
- lookup.tunnel_type = mutable->tunnel_type;
-
- return port_hash(&lookup);
-}
-
-static void check_table_empty(void)
-{
- struct tbl *old_table = rtnl_dereference(port_table);
-
- if (tbl_count(old_table) == 0) {
- cancel_delayed_work_sync(&cache_cleaner_wq);
- rcu_assign_pointer(port_table, NULL);
- tbl_deferred_destroy(old_table, NULL);
- }
-}
-
-static int add_port(struct vport *vport)
-{
- struct tbl *cur_table = rtnl_dereference(port_table);
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
- int err;
-
- if (!port_table) {
- struct tbl *new_table;
-
- new_table = tbl_create(TBL_MIN_BUCKETS);
- if (!new_table)
- return -ENOMEM;
-
- rcu_assign_pointer(port_table, new_table);
- schedule_cache_cleaner();
-
- } else if (tbl_count(cur_table) > tbl_n_buckets(cur_table)) {
- struct tbl *new_table;
-
- new_table = tbl_expand(cur_table);
- if (IS_ERR(new_table))
- return PTR_ERR(new_table);
-
- rcu_assign_pointer(port_table, new_table);
- tbl_deferred_destroy(cur_table, NULL);
- }
-
- err = tbl_insert(rtnl_dereference(port_table), &tnl_vport->tbl_node,
- mutable_hash(rtnl_dereference(tnl_vport->mutable)));
- if (err) {
- check_table_empty();
- return err;
- }
-
- (*find_port_pool(rtnl_dereference(tnl_vport->mutable)))++;
-
- return 0;
-}
-
-static int move_port(struct vport *vport, struct tnl_mutable_config *new_mutable)
-{
- int err;
- struct tbl *cur_table = rtnl_dereference(port_table);
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
- u32 hash;
-
- hash = mutable_hash(new_mutable);
- if (hash == tnl_vport->tbl_node.hash)
- goto table_updated;
-
- /*
- * Ideally we should make this move atomic to avoid having gaps in
- * finding tunnels or the possibility of failure. However, if we do
- * find a tunnel it will always be consistent.
- */
- err = tbl_remove(cur_table, &tnl_vport->tbl_node);
- if (err)
- return err;
-
- err = tbl_insert(cur_table, &tnl_vport->tbl_node, hash);
- if (err) {
- (*find_port_pool(rtnl_dereference(tnl_vport->mutable)))--;
- check_table_empty();
- return err;
- }
-
-table_updated:
- (*find_port_pool(rtnl_dereference(tnl_vport->mutable)))--;
- assign_config_rcu(vport, new_mutable);
- (*find_port_pool(rtnl_dereference(tnl_vport->mutable)))++;
-
- return 0;
-}
-
-static int del_port(struct vport *vport)
-{
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
- int err;
-
- err = tbl_remove(rtnl_dereference(port_table), &tnl_vport->tbl_node);
- if (err)
- return err;
-
- check_table_empty();
- (*find_port_pool(rtnl_dereference(tnl_vport->mutable)))--;
-
- return 0;
-}
-
-struct vport *tnl_find_port(__be32 saddr, __be32 daddr, __be64 key,
- int tunnel_type,
- const struct tnl_mutable_config **mutable)
-{
- struct port_lookup_key lookup;
- struct tbl *table = rcu_dereference_rtnl(port_table);
- struct tbl_node *tbl_node;
-
- if (unlikely(!table))
- return NULL;
-
- lookup.saddr = saddr;
- lookup.daddr = daddr;
-
- if (tunnel_type & TNL_T_KEY_EXACT) {
- lookup.key = key;
- lookup.tunnel_type = tunnel_type & ~TNL_T_KEY_MATCH;
-
- if (key_local_remote_ports) {
- tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
- if (tbl_node)
- goto found;
- }
-
- if (key_remote_ports) {
- lookup.saddr = 0;
-
- tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
- if (tbl_node)
- goto found;
-
- lookup.saddr = saddr;
- }
- }
-
- if (tunnel_type & TNL_T_KEY_MATCH) {
- lookup.key = 0;
- lookup.tunnel_type = tunnel_type & ~TNL_T_KEY_EXACT;
-
- if (local_remote_ports) {
- tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
- if (tbl_node)
- goto found;
- }
-
- if (remote_ports) {
- lookup.saddr = 0;
-
- tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
- if (tbl_node)
- goto found;
- }
- }
-
- return NULL;
-
-found:
- *mutable = lookup.mutable;
- return tnl_vport_to_vport(tnl_vport_table_cast(tbl_node));
-}
-
-static inline void ecn_decapsulate(struct sk_buff *skb)
-{
- /* This is accessing the outer IP header of the tunnel, which we've
- * already validated to be OK. skb->data is currently set to the start
- * of the inner Ethernet header, and we've validated ETH_HLEN.
- */
- if (unlikely(INET_ECN_is_ce(ip_hdr(skb)->tos))) {
- __be16 protocol = skb->protocol;
-
- skb_set_network_header(skb, ETH_HLEN);
-
- if (skb->protocol == htons(ETH_P_8021Q)) {
- if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
- return;
-
- protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
- skb_set_network_header(skb, VLAN_ETH_HLEN);
- }
-
- if (protocol == htons(ETH_P_IP)) {
- if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb)
- + sizeof(struct iphdr))))
- return;
-
- IP_ECN_set_ce(ip_hdr(skb));
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (protocol == htons(ETH_P_IPV6)) {
- if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb)
- + sizeof(struct ipv6hdr))))
- return;
-
- IP6_ECN_set_ce(ipv6_hdr(skb));
- }
-#endif
- }
-}
-
-/* Called with rcu_read_lock. */
-void tnl_rcv(struct vport *vport, struct sk_buff *skb)
-{
- /* Packets received by this function are in the following state:
- * - skb->data points to the inner Ethernet header.
- * - The inner Ethernet header is in the linear data area.
- * - skb->csum does not include the inner Ethernet header.
- * - The layer pointers point at the outer headers.
- */
-
- struct ethhdr *eh = (struct ethhdr *)skb->data;
-
- if (likely(ntohs(eh->h_proto) >= 1536))
- skb->protocol = eh->h_proto;
- else
- skb->protocol = htons(ETH_P_802_2);
-
- skb_dst_drop(skb);
- nf_reset(skb);
- secpath_reset(skb);
-
- ecn_decapsulate(skb);
- compute_ip_summed(skb, false);
-
- vport_receive(vport, skb);
-}
-
-static bool check_ipv4_address(__be32 addr)
-{
- if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr)
- || ipv4_is_loopback(addr) || ipv4_is_zeronet(addr))
- return false;
-
- return true;
-}
-
-static bool ipv4_should_icmp(struct sk_buff *skb)
-{
- struct iphdr *old_iph = ip_hdr(skb);
-
- /* Don't respond to L2 broadcast. */
- if (is_multicast_ether_addr(eth_hdr(skb)->h_dest))
- return false;
-
- /* Don't respond to L3 broadcast or invalid addresses. */
- if (!check_ipv4_address(old_iph->daddr) ||
- !check_ipv4_address(old_iph->saddr))
- return false;
-
- /* Only respond to the first fragment. */
- if (old_iph->frag_off & htons(IP_OFFSET))
- return false;
-
- /* Don't respond to ICMP error messages. */
- if (old_iph->protocol == IPPROTO_ICMP) {
- u8 icmp_type, *icmp_typep;
-
- icmp_typep = skb_header_pointer(skb, (u8 *)old_iph +
- (old_iph->ihl << 2) +
- offsetof(struct icmphdr, type) -
- skb->data, sizeof(icmp_type),
- &icmp_type);
-
- if (!icmp_typep)
- return false;
-
- if (*icmp_typep > NR_ICMP_TYPES
- || (*icmp_typep <= ICMP_PARAMETERPROB
- && *icmp_typep != ICMP_ECHOREPLY
- && *icmp_typep != ICMP_ECHO))
- return false;
- }
-
- return true;
-}
-
-static void ipv4_build_icmp(struct sk_buff *skb, struct sk_buff *nskb,
- unsigned int mtu, unsigned int payload_length)
-{
- struct iphdr *iph, *old_iph = ip_hdr(skb);
- struct icmphdr *icmph;
- u8 *payload;
-
- iph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
- icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr));
- payload = skb_put(nskb, payload_length);
-
- /* IP */
- iph->version = 4;
- iph->ihl = sizeof(struct iphdr) >> 2;
- iph->tos = (old_iph->tos & IPTOS_TOS_MASK) |
- IPTOS_PREC_INTERNETCONTROL;
- iph->tot_len = htons(sizeof(struct iphdr)
- + sizeof(struct icmphdr)
- + payload_length);
- get_random_bytes(&iph->id, sizeof(iph->id));
- iph->frag_off = 0;
- iph->ttl = IPDEFTTL;
- iph->protocol = IPPROTO_ICMP;
- iph->daddr = old_iph->saddr;
- iph->saddr = old_iph->daddr;
-
- ip_send_check(iph);
-
- /* ICMP */
- icmph->type = ICMP_DEST_UNREACH;
- icmph->code = ICMP_FRAG_NEEDED;
- icmph->un.gateway = htonl(mtu);
- icmph->checksum = 0;
-
- nskb->csum = csum_partial((u8 *)icmph, sizeof(struct icmphdr), 0);
- nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_iph - skb->data,
- payload, payload_length,
- nskb->csum);
- icmph->checksum = csum_fold(nskb->csum);
-}
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static bool ipv6_should_icmp(struct sk_buff *skb)
-{
- struct ipv6hdr *old_ipv6h = ipv6_hdr(skb);
- int addr_type;
- int payload_off = (u8 *)(old_ipv6h + 1) - skb->data;
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
-
- /* Check source address is valid. */
- addr_type = ipv6_addr_type(&old_ipv6h->saddr);
- if (addr_type & IPV6_ADDR_MULTICAST || addr_type == IPV6_ADDR_ANY)
- return false;
-
- /* Don't reply to unspecified addresses. */
- if (ipv6_addr_type(&old_ipv6h->daddr) == IPV6_ADDR_ANY)
- return false;
-
- /* Don't respond to ICMP error messages. */
- payload_off = ipv6_skip_exthdr(skb, payload_off, &nexthdr);
- if (payload_off < 0)
- return false;
-
- if (nexthdr == NEXTHDR_ICMP) {
- u8 icmp_type, *icmp_typep;
-
- icmp_typep = skb_header_pointer(skb, payload_off +
- offsetof(struct icmp6hdr,
- icmp6_type),
- sizeof(icmp_type), &icmp_type);
-
- if (!icmp_typep || !(*icmp_typep & ICMPV6_INFOMSG_MASK))
- return false;
- }
-
- return true;
-}
-
-static void ipv6_build_icmp(struct sk_buff *skb, struct sk_buff *nskb,
- unsigned int mtu, unsigned int payload_length)
-{
- struct ipv6hdr *ipv6h, *old_ipv6h = ipv6_hdr(skb);
- struct icmp6hdr *icmp6h;
- u8 *payload;
-
- ipv6h = (struct ipv6hdr *)skb_put(nskb, sizeof(struct ipv6hdr));
- icmp6h = (struct icmp6hdr *)skb_put(nskb, sizeof(struct icmp6hdr));
- payload = skb_put(nskb, payload_length);
-
- /* IPv6 */
- ipv6h->version = 6;
- ipv6h->priority = 0;
- memset(&ipv6h->flow_lbl, 0, sizeof(ipv6h->flow_lbl));
- ipv6h->payload_len = htons(sizeof(struct icmp6hdr)
- + payload_length);
- ipv6h->nexthdr = NEXTHDR_ICMP;
- ipv6h->hop_limit = IPV6_DEFAULT_HOPLIMIT;
- ipv6_addr_copy(&ipv6h->daddr, &old_ipv6h->saddr);
- ipv6_addr_copy(&ipv6h->saddr, &old_ipv6h->daddr);
-
- /* ICMPv6 */
- icmp6h->icmp6_type = ICMPV6_PKT_TOOBIG;
- icmp6h->icmp6_code = 0;
- icmp6h->icmp6_cksum = 0;
- icmp6h->icmp6_mtu = htonl(mtu);
-
- nskb->csum = csum_partial((u8 *)icmp6h, sizeof(struct icmp6hdr), 0);
- nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_ipv6h - skb->data,
- payload, payload_length,
- nskb->csum);
- icmp6h->icmp6_cksum = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
- sizeof(struct icmp6hdr)
- + payload_length,
- ipv6h->nexthdr, nskb->csum);
-}
-#endif /* IPv6 */
-
-bool tnl_frag_needed(struct vport *vport, const struct tnl_mutable_config *mutable,
- struct sk_buff *skb, unsigned int mtu, __be64 flow_key)
-{
- unsigned int eth_hdr_len = ETH_HLEN;
- unsigned int total_length = 0, header_length = 0, payload_length;
- struct ethhdr *eh, *old_eh = eth_hdr(skb);
- struct sk_buff *nskb;
-
- /* Sanity check */
- if (skb->protocol == htons(ETH_P_IP)) {
- if (mtu < IP_MIN_MTU)
- return false;
-
- if (!ipv4_should_icmp(skb))
- return true;
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
- if (mtu < IPV6_MIN_MTU)
- return false;
-
- /*
- * In theory we should do PMTUD on IPv6 multicast messages but
- * we don't have an address to send from so just fragment.
- */
- if (ipv6_addr_type(&ipv6_hdr(skb)->daddr) & IPV6_ADDR_MULTICAST)
- return false;
-
- if (!ipv6_should_icmp(skb))
- return true;
- }
-#endif
- else
- return false;
-
- /* Allocate */
- if (old_eh->h_proto == htons(ETH_P_8021Q))
- eth_hdr_len = VLAN_ETH_HLEN;
-
- payload_length = skb->len - eth_hdr_len;
- if (skb->protocol == htons(ETH_P_IP)) {
- header_length = sizeof(struct iphdr) + sizeof(struct icmphdr);
- total_length = min_t(unsigned int, header_length +
- payload_length, 576);
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else {
- header_length = sizeof(struct ipv6hdr) +
- sizeof(struct icmp6hdr);
- total_length = min_t(unsigned int, header_length +
- payload_length, IPV6_MIN_MTU);
- }
-#endif
-
- total_length = min(total_length, mutable->mtu);
- payload_length = total_length - header_length;
-
- nskb = dev_alloc_skb(NET_IP_ALIGN + eth_hdr_len + header_length +
- payload_length);
- if (!nskb)
- return false;
-
- skb_reserve(nskb, NET_IP_ALIGN);
-
- /* Ethernet / VLAN */
- eh = (struct ethhdr *)skb_put(nskb, eth_hdr_len);
- memcpy(eh->h_dest, old_eh->h_source, ETH_ALEN);
- memcpy(eh->h_source, mutable->eth_addr, ETH_ALEN);
- nskb->protocol = eh->h_proto = old_eh->h_proto;
- if (old_eh->h_proto == htons(ETH_P_8021Q)) {
- struct vlan_ethhdr *vh = (struct vlan_ethhdr *)eh;
-
- vh->h_vlan_TCI = vlan_eth_hdr(skb)->h_vlan_TCI;
- vh->h_vlan_encapsulated_proto = skb->protocol;
- }
- skb_reset_mac_header(nskb);
-
- /* Protocol */
- if (skb->protocol == htons(ETH_P_IP))
- ipv4_build_icmp(skb, nskb, mtu, payload_length);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else
- ipv6_build_icmp(skb, nskb, mtu, payload_length);
-#endif
-
- /*
- * Assume that flow based keys are symmetric with respect to input
- * and output and use the key that we were going to put on the
- * outgoing packet for the fake received packet. If the keys are
- * not symmetric then PMTUD needs to be disabled since we won't have
- * any way of synthesizing packets.
- */
- if ((mutable->flags & (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) ==
- (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION))
- OVS_CB(nskb)->tun_id = flow_key;
-
- compute_ip_summed(nskb, false);
- vport_receive(vport, nskb);
-
- return true;
-}
-
-static bool check_mtu(struct sk_buff *skb,
- struct vport *vport,
- const struct tnl_mutable_config *mutable,
- const struct rtable *rt, __be16 *frag_offp)
-{
- int mtu;
- __be16 frag_off;
-
- frag_off = (mutable->flags & TNL_F_PMTUD) ? htons(IP_DF) : 0;
- if (frag_off)
- mtu = dst_mtu(&rt_dst(rt))
- - ETH_HLEN
- - mutable->tunnel_hlen
- - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0);
- else
- mtu = mutable->mtu;
-
- if (skb->protocol == htons(ETH_P_IP)) {
- struct iphdr *old_iph = ip_hdr(skb);
-
- frag_off |= old_iph->frag_off & htons(IP_DF);
- mtu = max(mtu, IP_MIN_MTU);
-
- if ((old_iph->frag_off & htons(IP_DF)) &&
- mtu < ntohs(old_iph->tot_len)) {
- if (tnl_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id))
- goto drop;
- }
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
- unsigned int packet_length = skb->len - ETH_HLEN
- - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0);
-
- mtu = max(mtu, IPV6_MIN_MTU);
-
- /* IPv6 requires PMTUD if the packet is above the minimum MTU. */
- if (packet_length > IPV6_MIN_MTU)
- frag_off = htons(IP_DF);
-
- if (mtu < packet_length) {
- if (tnl_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id))
- goto drop;
- }
- }
-#endif
-
- *frag_offp = frag_off;
- return true;
-
-drop:
- *frag_offp = 0;
- return false;
-}
-
-static void create_tunnel_header(const struct vport *vport,
- const struct tnl_mutable_config *mutable,
- const struct rtable *rt, void *header)
-{
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
- struct iphdr *iph = header;
-
- iph->version = 4;
- iph->ihl = sizeof(struct iphdr) >> 2;
- iph->frag_off = htons(IP_DF);
- iph->protocol = tnl_vport->tnl_ops->ipproto;
- iph->tos = mutable->tos;
- iph->daddr = rt->rt_dst;
- iph->saddr = rt->rt_src;
- iph->ttl = mutable->ttl;
- if (!iph->ttl)
- iph->ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT);
-
- tnl_vport->tnl_ops->build_header(vport, mutable, iph + 1);
-}
-
-static inline void *get_cached_header(const struct tnl_cache *cache)
-{
- return (void *)cache + ALIGN(sizeof(struct tnl_cache), CACHE_DATA_ALIGN);
-}
-
-static inline bool check_cache_valid(const struct tnl_cache *cache,
- const struct tnl_mutable_config *mutable)
-{
- return cache &&
-#ifdef NEED_CACHE_TIMEOUT
- time_before(jiffies, cache->expiration) &&
-#endif
-#ifdef HAVE_RT_GENID
- atomic_read(&init_net.ipv4.rt_genid) == cache->rt->rt_genid &&
-#endif
-#ifdef HAVE_HH_SEQ
- rt_dst(cache->rt).hh->hh_lock.sequence == cache->hh_seq &&
-#endif
- mutable->seq == cache->mutable_seq &&
- (!is_internal_dev(rt_dst(cache->rt).dev) ||
- (cache->flow && !cache->flow->dead));
-}
-
-static int cache_cleaner_cb(struct tbl_node *tbl_node, void *aux)
-{
- struct tnl_vport *tnl_vport = tnl_vport_table_cast(tbl_node);
- const struct tnl_mutable_config *mutable = rcu_dereference(tnl_vport->mutable);
- const struct tnl_cache *cache = rcu_dereference(tnl_vport->cache);
-
- if (cache && !check_cache_valid(cache, mutable) &&
- spin_trylock_bh(&tnl_vport->cache_lock)) {
- assign_cache_rcu(tnl_vport_to_vport(tnl_vport), NULL);
- spin_unlock_bh(&tnl_vport->cache_lock);
- }
-
- return 0;
-}
-
-static void cache_cleaner(struct work_struct *work)
-{
- schedule_cache_cleaner();
-
- rcu_read_lock();
- tbl_foreach(rcu_dereference(port_table), cache_cleaner_cb, NULL);
- rcu_read_unlock();
-}