datapath/tunnel.h

   1 /*
   2  * Copyright (c) 2007-2012 Nicira, Inc.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of version 2 of the GNU General Public
   6  * License as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful, but
   9  * WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11  * General Public License for more details.
  12  *
  13  * You should have received a copy of the GNU General Public License
  14  * along with this program; if not, write to the Free Software
  15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16  * 02110-1301, USA
  17  */
  18
  19 #ifndef TUNNEL_H
  20 #define TUNNEL_H 1
  21
  22 #include <linux/version.h>
  23 #include <net/net_namespace.h>
  24 #include <net/netns/generic.h>
  25
  26 #include "flow.h"
  27 #include "openvswitch/tunnel.h"
  28 #include "vport.h"
  29
  30 /*
  31  * The absolute minimum fragment size.  Note that there are many other
  32  * definitions of the minimum MTU.
  33  */
  34 #define IP_MIN_MTU 68
  35
  36 /*
  37  * One of these goes in struct tnl_ops and in tnl_find_port().
  38  * These values are in the same namespace as other TNL_T_* values, so
  39  * only the least significant 10 bits are available to define protocol
  40  * identifiers.
  41  */
  42 #define TNL_T_PROTO_GRE         0
  43 #define TNL_T_PROTO_GRE64       1
  44 #define TNL_T_PROTO_CAPWAP      2
  45
  46 /* These flags are only needed when calling tnl_find_port(). */
  47 #define TNL_T_KEY_EXACT         (1 << 10)
  48 #define TNL_T_KEY_MATCH         (1 << 11)
  49
  50 /* Private flags not exposed to userspace in this form. */
  51 #define TNL_F_IN_KEY_MATCH      (1 << 16) /* Store the key in tun_id to
  52                                            * match in flow table. */
  53 #define TNL_F_OUT_KEY_ACTION    (1 << 17) /* Get the key from a SET_TUNNEL
  54                                            * action. */
  55
  56 /* All public tunnel flags. */
  57 #define TNL_F_PUBLIC (TNL_F_CSUM | TNL_F_TOS_INHERIT | TNL_F_TTL_INHERIT | \
  58                       TNL_F_DF_INHERIT | TNL_F_DF_DEFAULT | TNL_F_PMTUD | \
  59                       TNL_F_HDR_CACHE | TNL_F_IPSEC)
  60
  61 /**
  62  * struct port_lookup_key - Tunnel port key, used as hash table key.
  63  * @in_key: Key to match on input, 0 for wildcard.
  64  * @net: Network namespace of the port.
  65  * @saddr: IPv4 source address to match, 0 to accept any source address.
  66  * @daddr: IPv4 destination of tunnel.
  67  * @tunnel_type: Set of TNL_T_* flags that define lookup.
  68  */
  69 struct port_lookup_key {
  70         __be64 in_key;
  71 #ifdef CONFIG_NET_NS
  72         struct net *net;
  73 #endif
  74         __be32 saddr;
  75         __be32 daddr;
  76         u32    tunnel_type;
  77 };
  78
  79 #define PORT_KEY_LEN    (offsetof(struct port_lookup_key, tunnel_type) + \
  80                          FIELD_SIZEOF(struct port_lookup_key, tunnel_type))
  81
  82 static inline struct net *port_key_get_net(const struct port_lookup_key *key)
  83 {
  84         return read_pnet(&key->net);
  85 }
  86
  87 static inline void port_key_set_net(struct port_lookup_key *key, struct net *net)
  88 {
  89         write_pnet(&key->net, net);
  90 }
  91
  92 /**
  93  * struct tnl_mutable_config - modifiable configuration for a tunnel.
  94  * @key: Used as key for tunnel port.  Configured via OVS_TUNNEL_ATTR_*
  95  * attributes.
  96  * @rcu: RCU callback head for deferred destruction.
  97  * @seq: Sequence number for distinguishing configuration versions.
  98  * @tunnel_hlen: Tunnel header length.
  99  * @eth_addr: Source address for packets generated by tunnel itself
 100  * (e.g. ICMP fragmentation needed messages).
 101  * @out_key: Key to use on output, 0 if this tunnel has no fixed output key.
 102  * @flags: TNL_F_* flags.
 103  * @tos: IPv4 TOS value to use for tunnel, 0 if no fixed TOS.
 104  * @ttl: IPv4 TTL value to use for tunnel, 0 if no fixed TTL.
 105  */
 106 struct tnl_mutable_config {
 107         struct port_lookup_key key;
 108         struct rcu_head rcu;
 109
 110         unsigned seq;
 111
 112         unsigned tunnel_hlen;
 113
 114         unsigned char eth_addr[ETH_ALEN];
 115
 116         /* Configured via OVS_TUNNEL_ATTR_* attributes. */
 117         __be64  out_key;
 118         u32     flags;
 119         u8      tos;
 120         u8      ttl;
 121
 122         /* Multicast configuration. */
 123         int     mlink;
 124 };
 125
 126 struct tnl_ops {
 127         u32 tunnel_type;        /* Put the TNL_T_PROTO_* type in here. */
 128         u8 ipproto;             /* The IP protocol for the tunnel. */
 129
 130         /*
 131          * Returns the length of the tunnel header that will be added in
 132          * build_header() (i.e. excludes the IP header).  Returns a negative
 133          * error code if the configuration is invalid.
 134          */
 135         int (*hdr_len)(const struct tnl_mutable_config *);
 136
 137         /*
 138          * Builds the static portion of the tunnel header, which is stored in
 139          * the header cache.  In general the performance of this function is
 140          * not too important as we try to only call it when building the cache
 141          * so it is preferable to shift as much work as possible here.  However,
 142          * in some circumstances caching is disabled and this function will be
 143          * called for every packet, so try not to make it too slow.
 144          */
 145         void (*build_header)(const struct vport *,
 146                              const struct tnl_mutable_config *, void *header);
 147
 148         /*
 149          * Updates the cached header of a packet to match the actual packet
 150          * data.  Typical things that might need to be updated are length,
 151          * checksum, etc.  The IP header will have already been updated and this
 152          * is the final step before transmission.  Returns a linked list of
 153          * completed SKBs (multiple packets may be generated in the event
 154          * of fragmentation).
 155          */
 156         struct sk_buff *(*update_header)(const struct vport *,
 157                                          const struct tnl_mutable_config *,
 158                                          struct dst_entry *, struct sk_buff *);
 159 };
 160
 161 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
 162 /*
 163  * On these kernels we have a fast mechanism to tell if the ARP cache for a
 164  * particular destination has changed.
 165  */
 166 #define HAVE_HH_SEQ
 167 #endif
 168 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)
 169 /*
 170  * On these kernels we have a fast mechanism to tell if the routing table
 171  * has changed.
 172  */
 173 #define HAVE_RT_GENID
 174 #endif
 175 #if !defined(HAVE_HH_SEQ) || !defined(HAVE_RT_GENID)
 176 /* If we can't detect all system changes directly we need to use a timeout. */
 177 #define NEED_CACHE_TIMEOUT
 178 #endif
 179 struct tnl_cache {
 180         struct rcu_head rcu;
 181
 182         int len;                /* Length of data to be memcpy'd from cache. */
 183         int hh_len;             /* Hardware hdr length, cached from hh_cache. */
 184
 185         /* Sequence number of mutable->seq from which this cache was
 186          * generated. */
 187         unsigned mutable_seq;
 188
 189 #ifdef HAVE_HH_SEQ
 190         /*
 191          * The sequence number from the seqlock protecting the hardware header
 192          * cache (in the ARP cache).  Since every write increments the counter
 193          * this gives us an easy way to tell if it has changed.
 194          */
 195         unsigned hh_seq;
 196 #endif
 197
 198 #ifdef NEED_CACHE_TIMEOUT
 199         /*
 200          * If we don't have direct mechanisms to detect all important changes in
 201          * the system fall back to an expiration time.  This expiration time
 202          * can be relatively short since at high rates there will be millions of
 203          * packets per second, so we'll still get plenty of benefit from the
 204          * cache.  Note that if something changes we may blackhole packets
 205          * until the expiration time (depending on what changed and the kernel
 206          * version we may be able to detect the change sooner).  Expiration is
 207          * expressed as a time in jiffies.
 208          */
 209         unsigned long expiration;
 210 #endif
 211
 212         /*
 213          * The routing table entry that is the result of looking up the tunnel
 214          * endpoints.  It also contains a sequence number (called a generation
 215          * ID) that can be compared to a global sequence to tell if the routing
 216          * table has changed (and therefore there is a potential that this
 217          * cached route has been invalidated).
 218          */
 219         struct rtable *rt;
 220
 221         /*
 222          * If the output device for tunnel traffic is an OVS internal device,
 223          * the flow of that datapath.  Since all tunnel traffic will have the
 224          * same headers this allows us to cache the flow lookup.  NULL if the
 225          * output device is not OVS or if there is no flow installed.
 226          */
 227         struct sw_flow *flow;
 228
 229         /* The cached header follows after padding for alignment. */
 230 };
 231
 232 struct tnl_vport {
 233         struct rcu_head rcu;
 234         struct hlist_node hash_node;
 235
 236         char name[IFNAMSIZ];
 237         const struct tnl_ops *tnl_ops;
 238
 239         struct tnl_mutable_config __rcu *mutable;
 240
 241         /*
 242          * ID of last fragment sent (for tunnel protocols with direct support
 243          * fragmentation).  If the protocol relies on IP fragmentation then
 244          * this is not needed.
 245          */
 246         atomic_t frag_id;
 247
 248         spinlock_t cache_lock;
 249         struct tnl_cache __rcu *cache;  /* Protected by RCU/cache_lock. */
 250
 251 #ifdef NEED_CACHE_TIMEOUT
 252         /*
 253          * If we must rely on expiration time to invalidate the cache, this is
 254          * the interval.  It is randomized within a range (defined by
 255          * MAX_CACHE_EXP in tunnel.c) to avoid synchronized expirations caused
 256          * by creation of a large number of tunnels at a one time.
 257          */
 258         unsigned long cache_exp_interval;
 259 #endif
 260 };
 261
 262 struct vport *ovs_tnl_create(const struct vport_parms *, const struct vport_ops *,
 263                              const struct tnl_ops *);
 264 void ovs_tnl_destroy(struct vport *);
 265
 266 int ovs_tnl_set_options(struct vport *, struct nlattr *);
 267 int ovs_tnl_get_options(const struct vport *, struct sk_buff *);
 268
 269 int ovs_tnl_set_addr(struct vport *vport, const unsigned char *addr);
 270 const char *ovs_tnl_get_name(const struct vport *vport);
 271 const unsigned char *ovs_tnl_get_addr(const struct vport *vport);
 272 int ovs_tnl_send(struct vport *vport, struct sk_buff *skb);
 273 void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos);
 274
 275 struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr,
 276                                 __be64 key, int tunnel_type,
 277                                 const struct tnl_mutable_config **mutable);
 278 bool ovs_tnl_frag_needed(struct vport *vport,
 279                          const struct tnl_mutable_config *mutable,
 280                          struct sk_buff *skb, unsigned int mtu, __be64 flow_key);
 281 void ovs_tnl_free_linked_skbs(struct sk_buff *skb);
 282
 283 int ovs_tnl_init(void);
 284 void ovs_tnl_exit(void);
 285 static inline struct tnl_vport *tnl_vport_priv(const struct vport *vport)
 286 {
 287         return vport_priv(vport);
 288 }
 289
 290 #endif /* tunnel.h */