net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
   5  *
   6  *      Authors:
   7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   8  *
   9  *      Fixes:
  10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
  11  *                                      a module taking up 2 pages).
  12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  13  *                                      to keep ip_forward happy.
  14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  16  *              David Woodhouse :       Perform some basic ICMP handling.
  17  *                                      IPIP Routing without decapsulation.
  18  *              Carlos Picoto   :       GRE over IP support
  19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  20  *                                      I do not want to merge them together.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  *
  27  */
  28
  29 /* tunnel.c: an IP tunnel driver
  30
  31         The purpose of this driver is to provide an IP tunnel through
  32         which you can tunnel network traffic transparently across subnets.
  33
  34         This was written by looking at Nick Holloway's dummy driver
  35         Thanks for the great code!
  36
  37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  38
  39         Minor tweaks:
  40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  41                 dev->hard_header/hard_header_len changed to use no headers.
  42                 Comments/bracketing tweaked.
  43                 Made the tunnels use dev->name not tunnel: when error reporting.
  44                 Added tx_dropped stat
  45
  46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
  47
  48         Reworked:
  49                 Changed to tunnel to destination gateway in addition to the
  50                         tunnel's pointopoint address
  51                 Almost completely rewritten
  52                 Note:  There is currently no firewall or ICMP handling done.
  53
  54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  55
  56 */
  57
  58 /* Things I wish I had known when writing the tunnel driver:
  59
  60         When the tunnel_xmit() function is called, the skb contains the
  61         packet to be sent (plus a great deal of extra info), and dev
  62         contains the tunnel device that _we_ are.
  63
  64         When we are passed a packet, we are expected to fill in the
  65         source address with our source IP address.
  66
  67         What is the proper way to allocate, copy and free a buffer?
  68         After you allocate it, it is a "0 length" chunk of memory
  69         starting at zero.  If you want to add headers to the buffer
  70         later, you'll have to call "skb_reserve(skb, amount)" with
  71         the amount of memory you want reserved.  Then, you call
  72         "skb_put(skb, amount)" with the amount of space you want in
  73         the buffer.  skb_put() returns a pointer to the top (#0) of
  74         that buffer.  skb->len is set to the amount of space you have
  75         "allocated" with skb_put().  You can then write up to skb->len
  76         bytes to that buffer.  If you need more, you can call skb_put()
  77         again with the additional amount of space you need.  You can
  78         find out how much more space you can allocate by calling
  79         "skb_tailroom(skb)".
  80         Now, to add header space, call "skb_push(skb, header_len)".
  81         This creates space at the beginning of the buffer and returns
  82         a pointer to this new space.  If later you need to strip a
  83         header from a buffer, call "skb_pull(skb, header_len)".
  84         skb_headroom() will return how much space is left at the top
  85         of the buffer (before the main data).  Remember, this headroom
  86         space must be reserved before the skb_put() function is called.
  87         */
  88
  89 /*
  90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  91
  92    For comments look at net/ipv4/ip_gre.c --ANK
  93  */
  94
  95
  96 #include <linux/config.h>
  97 #include <linux/module.h>
  98 #include <linux/types.h>
  99 #include <linux/sched.h>
 100 #include <linux/kernel.h>
 101 #include <asm/uaccess.h>
 102 #include <linux/skbuff.h>
 103 #include <linux/netdevice.h>
 104 #include <linux/in.h>
 105 #include <linux/tcp.h>
 106 #include <linux/udp.h>
 107 #include <linux/if_arp.h>
 108 #include <linux/mroute.h>
 109 #include <linux/init.h>
 110 #include <linux/netfilter_ipv4.h>
 111
 112 #include <net/sock.h>
 113 #include <net/ip.h>
 114 #include <net/icmp.h>
 115 #include <net/protocol.h>
 116 #include <net/ipip.h>
 117 #include <net/inet_ecn.h>
 118 #include <net/xfrm.h>
 119
 120 #define HASH_SIZE  16
 121 #define HASH(addr) ((addr^(addr>>4))&0xF)
 122
 123 static int ipip_fb_tunnel_init(struct net_device *dev);
 124 static int ipip_tunnel_init(struct net_device *dev);
 125 static void ipip_tunnel_setup(struct net_device *dev);
 126
 127 static struct net_device *ipip_fb_tunnel_dev;
 128
 129 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
 130 static struct ip_tunnel *tunnels_r[HASH_SIZE];
 131 static struct ip_tunnel *tunnels_l[HASH_SIZE];
 132 static struct ip_tunnel *tunnels_wc[1];
 133 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
 134
 135 static rwlock_t ipip_lock = RW_LOCK_UNLOCKED;
 136
 137 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
 138 {
 139         unsigned h0 = HASH(remote);
 140         unsigned h1 = HASH(local);
 141         struct ip_tunnel *t;
 142
 143         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
 144                 if (local == t->parms.iph.saddr &&
 145                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 146                         return t;
 147         }
 148         for (t = tunnels_r[h0]; t; t = t->next) {
 149                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 150                         return t;
 151         }
 152         for (t = tunnels_l[h1]; t; t = t->next) {
 153                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 154                         return t;
 155         }
 156         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
 157                 return t;
 158         return NULL;
 159 }
 160
 161 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
 162 {
 163         u32 remote = t->parms.iph.daddr;
 164         u32 local = t->parms.iph.saddr;
 165         unsigned h = 0;
 166         int prio = 0;
 167
 168         if (remote) {
 169                 prio |= 2;
 170                 h ^= HASH(remote);
 171         }
 172         if (local) {
 173                 prio |= 1;
 174                 h ^= HASH(local);
 175         }
 176         return &tunnels[prio][h];
 177 }
 178
 179
 180 static void ipip_tunnel_unlink(struct ip_tunnel *t)
 181 {
 182         struct ip_tunnel **tp;
 183
 184         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
 185                 if (t == *tp) {
 186                         write_lock_bh(&ipip_lock);
 187                         *tp = t->next;
 188                         write_unlock_bh(&ipip_lock);
 189                         break;
 190                 }
 191         }
 192 }
 193
 194 static void ipip_tunnel_link(struct ip_tunnel *t)
 195 {
 196         struct ip_tunnel **tp = ipip_bucket(t);
 197
 198         t->next = *tp;
 199         write_lock_bh(&ipip_lock);
 200         *tp = t;
 201         write_unlock_bh(&ipip_lock);
 202 }
 203
 204 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
 205 {
 206         u32 remote = parms->iph.daddr;
 207         u32 local = parms->iph.saddr;
 208         struct ip_tunnel *t, **tp, *nt;
 209         struct net_device *dev;
 210         unsigned h = 0;
 211         int prio = 0;
 212         char name[IFNAMSIZ];
 213
 214         if (remote) {
 215                 prio |= 2;
 216                 h ^= HASH(remote);
 217         }
 218         if (local) {
 219                 prio |= 1;
 220                 h ^= HASH(local);
 221         }
 222         for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
 223                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 224                         return t;
 225         }
 226         if (!create)
 227                 return NULL;
 228
 229         if (parms->name[0])
 230                 strlcpy(name, parms->name, IFNAMSIZ);
 231         else {
 232                 int i;
 233                 for (i=1; i<100; i++) {
 234                         sprintf(name, "tunl%d", i);
 235                         if (__dev_get_by_name(name) == NULL)
 236                                 break;
 237                 }
 238                 if (i==100)
 239                         goto failed;
 240         }
 241
 242         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 243         if (dev == NULL)
 244                 return NULL;
 245
 246         nt = dev->priv;
 247         SET_MODULE_OWNER(dev);
 248         dev->init = ipip_tunnel_init;
 249         dev->destructor = free_netdev;
 250         nt->parms = *parms;
 251
 252         if (register_netdevice(dev) < 0) {
 253                 free_netdev(dev);
 254                 goto failed;
 255         }
 256
 257         dev_hold(dev);
 258         ipip_tunnel_link(nt);
 259         /* Do not decrement MOD_USE_COUNT here. */
 260         return nt;
 261
 262 failed:
 263         return NULL;
 264 }
 265
 266 static void ipip_tunnel_uninit(struct net_device *dev)
 267 {
 268         if (dev == ipip_fb_tunnel_dev) {
 269                 write_lock_bh(&ipip_lock);
 270                 tunnels_wc[0] = NULL;
 271                 write_unlock_bh(&ipip_lock);
 272         } else
 273                 ipip_tunnel_unlink((struct ip_tunnel*)dev->priv);
 274         dev_put(dev);
 275 }
 276
 277 static void ipip_err(struct sk_buff *skb, void *__unused)
 278 {
 279 #ifndef I_WISH_WORLD_WERE_PERFECT
 280
 281 /* It is not :-( All the routers (except for Linux) return only
 282    8 bytes of packet payload. It means, that precise relaying of
 283    ICMP in the real Internet is absolutely infeasible.
 284  */
 285         struct iphdr *iph = (struct iphdr*)skb->data;
 286         int type = skb->h.icmph->type;
 287         int code = skb->h.icmph->code;
 288         struct ip_tunnel *t;
 289
 290         switch (type) {
 291         default:
 292         case ICMP_PARAMETERPROB:
 293                 return;
 294
 295         case ICMP_DEST_UNREACH:
 296                 switch (code) {
 297                 case ICMP_SR_FAILED:
 298                 case ICMP_PORT_UNREACH:
 299                         /* Impossible event. */
 300                         return;
 301                 case ICMP_FRAG_NEEDED:
 302                         /* Soft state for pmtu is maintained by IP core. */
 303                         return;
 304                 default:
 305                         /* All others are translated to HOST_UNREACH.
 306                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 307                            I believe they are just ether pollution. --ANK
 308                          */
 309                         break;
 310                 }
 311                 break;
 312         case ICMP_TIME_EXCEEDED:
 313                 if (code != ICMP_EXC_TTL)
 314                         return;
 315                 break;
 316         }
 317
 318         read_lock(&ipip_lock);
 319         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
 320         if (t == NULL || t->parms.iph.daddr == 0)
 321                 goto out;
 322         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 323                 goto out;
 324
 325         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
 326                 t->err_count++;
 327         else
 328                 t->err_count = 1;
 329         t->err_time = jiffies;
 330 out:
 331         read_unlock(&ipip_lock);
 332         return;
 333 #else
 334         struct iphdr *iph = (struct iphdr*)dp;
 335         int hlen = iph->ihl<<2;
 336         struct iphdr *eiph;
 337         int type = skb->h.icmph->type;
 338         int code = skb->h.icmph->code;
 339         int rel_type = 0;
 340         int rel_code = 0;
 341         int rel_info = 0;
 342         struct sk_buff *skb2;
 343         struct flowi fl;
 344         struct rtable *rt;
 345
 346         if (len < hlen + sizeof(struct iphdr))
 347                 return;
 348         eiph = (struct iphdr*)(dp + hlen);
 349
 350         switch (type) {
 351         default:
 352                 return;
 353         case ICMP_PARAMETERPROB:
 354                 if (skb->h.icmph->un.gateway < hlen)
 355                         return;
 356
 357                 /* So... This guy found something strange INSIDE encapsulated
 358                    packet. Well, he is fool, but what can we do ?
 359                  */
 360                 rel_type = ICMP_PARAMETERPROB;
 361                 rel_info = skb->h.icmph->un.gateway - hlen;
 362                 break;
 363
 364         case ICMP_DEST_UNREACH:
 365                 switch (code) {
 366                 case ICMP_SR_FAILED:
 367                 case ICMP_PORT_UNREACH:
 368                         /* Impossible event. */
 369                         return;
 370                 case ICMP_FRAG_NEEDED:
 371                         /* And it is the only really necessary thing :-) */
 372                         rel_info = ntohs(skb->h.icmph->un.frag.mtu);
 373                         if (rel_info < hlen+68)
 374                                 return;
 375                         rel_info -= hlen;
 376                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
 377                         if (rel_info > ntohs(eiph->tot_len))
 378                                 return;
 379                         break;
 380                 default:
 381                         /* All others are translated to HOST_UNREACH.
 382                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 383                            I believe, it is just ether pollution. --ANK
 384                          */
 385                         rel_type = ICMP_DEST_UNREACH;
 386                         rel_code = ICMP_HOST_UNREACH;
 387                         break;
 388                 }
 389                 break;
 390         case ICMP_TIME_EXCEEDED:
 391                 if (code != ICMP_EXC_TTL)
 392                         return;
 393                 break;
 394         }
 395
 396         /* Prepare fake skb to feed it to icmp_send */
 397         skb2 = skb_clone(skb, GFP_ATOMIC);
 398         if (skb2 == NULL)
 399                 return;
 400         dst_release(skb2->dst);
 401         skb2->dst = NULL;
 402         skb_pull(skb2, skb->data - (u8*)eiph);
 403         skb2->nh.raw = skb2->data;
 404
 405         /* Try to guess incoming interface */
 406         memset(&fl, 0, sizeof(fl));
 407         fl.fl4_daddr = eiph->saddr;
 408         fl.fl4_tos = RT_TOS(eiph->tos);
 409         fl.proto = IPPROTO_IPIP;
 410         if (ip_route_output_key(&rt, &key)) {
 411                 kfree_skb(skb2);
 412                 return;
 413         }
 414         skb2->dev = rt->u.dst.dev;
 415
 416         /* route "incoming" packet */
 417         if (rt->rt_flags&RTCF_LOCAL) {
 418                 ip_rt_put(rt);
 419                 rt = NULL;
 420                 fl.fl4_daddr = eiph->daddr;
 421                 fl.fl4_src = eiph->saddr;
 422                 fl.fl4_tos = eiph->tos;
 423                 if (ip_route_output_key(&rt, &fl) ||
 424                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
 425                         ip_rt_put(rt);
 426                         kfree_skb(skb2);
 427                         return;
 428                 }
 429         } else {
 430                 ip_rt_put(rt);
 431                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
 432                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
 433                         kfree_skb(skb2);
 434                         return;
 435                 }
 436         }
 437
 438         /* change mtu on this route */
 439         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 440                 if (rel_info > dst_pmtu(skb2->dst)) {
 441                         kfree_skb(skb2);
 442                         return;
 443                 }
 444                 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
 445                 rel_info = htonl(rel_info);
 446         } else if (type == ICMP_TIME_EXCEEDED) {
 447                 struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
 448                 if (t->parms.iph.ttl) {
 449                         rel_type = ICMP_DEST_UNREACH;
 450                         rel_code = ICMP_HOST_UNREACH;
 451                 }
 452         }
 453
 454         icmp_send(skb2, rel_type, rel_code, rel_info);
 455         kfree_skb(skb2);
 456         return;
 457 #endif
 458 }
 459
 460 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
 461 {
 462         struct iphdr *inner_iph = skb->nh.iph;
 463
 464         if (INET_ECN_is_ce(outer_iph->tos) &&
 465             INET_ECN_is_not_ce(inner_iph->tos))
 466                 IP_ECN_set_ce(inner_iph);
 467 }
 468
 469 static int ipip_rcv(struct sk_buff *skb)
 470 {
 471         struct iphdr *iph;
 472         struct ip_tunnel *tunnel;
 473
 474         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 475                 goto out;
 476
 477         iph = skb->nh.iph;
 478
 479         read_lock(&ipip_lock);
 480         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
 481                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 482                         read_unlock(&ipip_lock);
 483                         kfree_skb(skb);
 484                         return 0;
 485                 }
 486
 487                 secpath_reset(skb);
 488
 489                 skb->mac.raw = skb->nh.raw;
 490                 skb->nh.raw = skb->data;
 491                 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
 492                 skb->protocol = htons(ETH_P_IP);
 493                 skb->pkt_type = PACKET_HOST;
 494
 495                 tunnel->stat.rx_packets++;
 496                 tunnel->stat.rx_bytes += skb->len;
 497                 skb->dev = tunnel->dev;
 498                 dst_release(skb->dst);
 499                 skb->dst = NULL;
 500                 nf_reset(skb);
 501                 ipip_ecn_decapsulate(iph, skb);
 502                 netif_rx(skb);
 503                 read_unlock(&ipip_lock);
 504                 return 0;
 505         }
 506         read_unlock(&ipip_lock);
 507
 508 out:
 509         return -1;
 510 }
 511
 512 /*
 513  *      This function assumes it is being called from dev_queue_xmit()
 514  *      and that skb is filled properly by that function.
 515  */
 516
 517 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 518 {
 519         struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
 520         struct net_device_stats *stats = &tunnel->stat;
 521         struct iphdr  *tiph = &tunnel->parms.iph;
 522         u8     tos = tunnel->parms.iph.tos;
 523         u16    df = tiph->frag_off;
 524         struct rtable *rt;                      /* Route to the other host */
 525         struct net_device *tdev;                        /* Device to other host */
 526         struct iphdr  *old_iph = skb->nh.iph;
 527         struct iphdr  *iph;                     /* Our new IP header */
 528         int    max_headroom;                    /* The extra header space needed */
 529         u32    dst = tiph->daddr;
 530         int    mtu;
 531
 532         if (tunnel->recursion++) {
 533                 tunnel->stat.collisions++;
 534                 goto tx_error;
 535         }
 536
 537         if (skb->protocol != htons(ETH_P_IP))
 538                 goto tx_error;
 539
 540         if (tos&1)
 541                 tos = old_iph->tos;
 542
 543         if (!dst) {
 544                 /* NBMA tunnel */
 545                 if ((rt = (struct rtable*)skb->dst) == NULL) {
 546                         tunnel->stat.tx_fifo_errors++;
 547                         goto tx_error;
 548                 }
 549                 if ((dst = rt->rt_gateway) == 0)
 550                         goto tx_error_icmp;
 551         }
 552
 553         {
 554                 struct flowi fl = { .oif = tunnel->parms.link,
 555                                     .nl_u = { .ip4_u =
 556                                               { .daddr = dst,
 557                                                 .saddr = tiph->saddr,
 558                                                 .tos = RT_TOS(tos) } },
 559                                     .proto = IPPROTO_IPIP };
 560                 if (ip_route_output_key(&rt, &fl)) {
 561                         tunnel->stat.tx_carrier_errors++;
 562                         goto tx_error_icmp;
 563                 }
 564         }
 565         tdev = rt->u.dst.dev;
 566
 567         if (tdev == dev) {
 568                 ip_rt_put(rt);
 569                 tunnel->stat.collisions++;
 570                 goto tx_error;
 571         }
 572
 573         if (tiph->frag_off)
 574                 mtu = dst_pmtu(&rt->u.dst) - sizeof(struct iphdr);
 575         else
 576                 mtu = skb->dst ? dst_pmtu(skb->dst) : dev->mtu;
 577
 578         if (mtu < 68) {
 579                 tunnel->stat.collisions++;
 580                 ip_rt_put(rt);
 581                 goto tx_error;
 582         }
 583         if (skb->dst)
 584                 skb->dst->ops->update_pmtu(skb->dst, mtu);
 585
 586         df |= (old_iph->frag_off&htons(IP_DF));
 587
 588         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
 589                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 590                 ip_rt_put(rt);
 591                 goto tx_error;
 592         }
 593
 594         if (tunnel->err_count > 0) {
 595                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
 596                         tunnel->err_count--;
 597                         dst_link_failure(skb);
 598                 } else
 599                         tunnel->err_count = 0;
 600         }
 601
 602         /*
 603          * Okay, now see if we can stuff it in the buffer as-is.
 604          */
 605         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 606
 607         if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
 608                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 609                 if (!new_skb) {
 610                         ip_rt_put(rt);
 611                         stats->tx_dropped++;
 612                         dev_kfree_skb(skb);
 613                         tunnel->recursion--;
 614                         return 0;
 615                 }
 616                 if (skb->sk)
 617                         skb_set_owner_w(new_skb, skb->sk);
 618                 dev_kfree_skb(skb);
 619                 skb = new_skb;
 620                 old_iph = skb->nh.iph;
 621         }
 622
 623         skb->h.raw = skb->nh.raw;
 624         skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
 625         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 626         dst_release(skb->dst);
 627         skb->dst = &rt->u.dst;
 628
 629         /*
 630          *      Push down and install the IPIP header.
 631          */
 632
 633         iph                     =       skb->nh.iph;
 634         iph->version            =       4;
 635         iph->ihl                =       sizeof(struct iphdr)>>2;
 636         iph->frag_off           =       df;
 637         iph->protocol           =       IPPROTO_IPIP;
 638         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 639         iph->daddr              =       rt->rt_dst;
 640         iph->saddr              =       rt->rt_src;
 641
 642         if ((iph->ttl = tiph->ttl) == 0)
 643                 iph->ttl        =       old_iph->ttl;
 644
 645         nf_reset(skb);
 646
 647         IPTUNNEL_XMIT();
 648         tunnel->recursion--;
 649         return 0;
 650
 651 tx_error_icmp:
 652         dst_link_failure(skb);
 653 tx_error:
 654         stats->tx_errors++;
 655         dev_kfree_skb(skb);
 656         tunnel->recursion--;
 657         return 0;
 658 }
 659
 660 static int
 661 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 662 {
 663         int err = 0;
 664         struct ip_tunnel_parm p;
 665         struct ip_tunnel *t;
 666
 667         switch (cmd) {
 668         case SIOCGETTUNNEL:
 669                 t = NULL;
 670                 if (dev == ipip_fb_tunnel_dev) {
 671                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 672                                 err = -EFAULT;
 673                                 break;
 674                         }
 675                         t = ipip_tunnel_locate(&p, 0);
 676                 }
 677                 if (t == NULL)
 678                         t = (struct ip_tunnel*)dev->priv;
 679                 memcpy(&p, &t->parms, sizeof(p));
 680                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 681                         err = -EFAULT;
 682                 break;
 683
 684         case SIOCADDTUNNEL:
 685         case SIOCCHGTUNNEL:
 686                 err = -EPERM;
 687                 if (!capable(CAP_NET_ADMIN))
 688                         goto done;
 689
 690                 err = -EFAULT;
 691                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 692                         goto done;
 693
 694                 err = -EINVAL;
 695                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 696                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 697                         goto done;
 698                 if (p.iph.ttl)
 699                         p.iph.frag_off |= htons(IP_DF);
 700
 701                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
 702
 703                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 704                         if (t != NULL) {
 705                                 if (t->dev != dev) {
 706                                         err = -EEXIST;
 707                                         break;
 708                                 }
 709                         } else {
 710                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 711                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 712                                         err = -EINVAL;
 713                                         break;
 714                                 }
 715                                 t = (struct ip_tunnel*)dev->priv;
 716                                 ipip_tunnel_unlink(t);
 717                                 t->parms.iph.saddr = p.iph.saddr;
 718                                 t->parms.iph.daddr = p.iph.daddr;
 719                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
 720                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
 721                                 ipip_tunnel_link(t);
 722                                 netdev_state_change(dev);
 723                         }
 724                 }
 725
 726                 if (t) {
 727                         err = 0;
 728                         if (cmd == SIOCCHGTUNNEL) {
 729                                 t->parms.iph.ttl = p.iph.ttl;
 730                                 t->parms.iph.tos = p.iph.tos;
 731                                 t->parms.iph.frag_off = p.iph.frag_off;
 732                         }
 733                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 734                                 err = -EFAULT;
 735                 } else
 736                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 737                 break;
 738
 739         case SIOCDELTUNNEL:
 740                 err = -EPERM;
 741                 if (!capable(CAP_NET_ADMIN))
 742                         goto done;
 743
 744                 if (dev == ipip_fb_tunnel_dev) {
 745                         err = -EFAULT;
 746                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 747                                 goto done;
 748                         err = -ENOENT;
 749                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
 750                                 goto done;
 751                         err = -EPERM;
 752                         if (t->dev == ipip_fb_tunnel_dev)
 753                                 goto done;
 754                         dev = t->dev;
 755                 }
 756                 err = unregister_netdevice(dev);
 757                 break;
 758
 759         default:
 760                 err = -EINVAL;
 761         }
 762
 763 done:
 764         return err;
 765 }
 766
 767 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
 768 {
 769         return &(((struct ip_tunnel*)dev->priv)->stat);
 770 }
 771
 772 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 773 {
 774         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 775                 return -EINVAL;
 776         dev->mtu = new_mtu;
 777         return 0;
 778 }
 779
 780 static void ipip_tunnel_setup(struct net_device *dev)
 781 {
 782         SET_MODULE_OWNER(dev);
 783         dev->uninit             = ipip_tunnel_uninit;
 784         dev->hard_start_xmit    = ipip_tunnel_xmit;
 785         dev->get_stats          = ipip_tunnel_get_stats;
 786         dev->do_ioctl           = ipip_tunnel_ioctl;
 787         dev->change_mtu         = ipip_tunnel_change_mtu;
 788
 789         dev->type               = ARPHRD_TUNNEL;
 790         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 791         dev->mtu                = 1500 - sizeof(struct iphdr);
 792         dev->flags              = IFF_NOARP;
 793         dev->iflink             = 0;
 794         dev->addr_len           = 4;
 795 }
 796
 797 static int ipip_tunnel_init(struct net_device *dev)
 798 {
 799         struct net_device *tdev = NULL;
 800         struct ip_tunnel *tunnel;
 801         struct iphdr *iph;
 802
 803         tunnel = (struct ip_tunnel*)dev->priv;
 804         iph = &tunnel->parms.iph;
 805
 806         tunnel->dev = dev;
 807         strcpy(tunnel->parms.name, dev->name);
 808
 809         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 810         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 811
 812         if (iph->daddr) {
 813                 struct flowi fl = { .oif = tunnel->parms.link,
 814                                     .nl_u = { .ip4_u =
 815                                               { .daddr = iph->daddr,
 816                                                 .saddr = iph->saddr,
 817                                                 .tos = RT_TOS(iph->tos) } },
 818                                     .proto = IPPROTO_IPIP };
 819                 struct rtable *rt;
 820                 if (!ip_route_output_key(&rt, &fl)) {
 821                         tdev = rt->u.dst.dev;
 822                         ip_rt_put(rt);
 823                 }
 824                 dev->flags |= IFF_POINTOPOINT;
 825         }
 826
 827         if (!tdev && tunnel->parms.link)
 828                 tdev = __dev_get_by_index(tunnel->parms.link);
 829
 830         if (tdev) {
 831                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 832                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
 833         }
 834         dev->iflink = tunnel->parms.link;
 835
 836         return 0;
 837 }
 838
 839 static int __init ipip_fb_tunnel_init(struct net_device *dev)
 840 {
 841         struct ip_tunnel *tunnel = dev->priv;
 842         struct iphdr *iph = &tunnel->parms.iph;
 843
 844         tunnel->dev = dev;
 845         strcpy(tunnel->parms.name, dev->name);
 846
 847         iph->version            = 4;
 848         iph->protocol           = IPPROTO_IPIP;
 849         iph->ihl                = 5;
 850
 851         dev_hold(dev);
 852         tunnels_wc[0]           = tunnel;
 853         return 0;
 854 }
 855
 856 static struct xfrm_tunnel ipip_handler = {
 857         .handler        =       ipip_rcv,
 858         .err_handler    =       ipip_err,
 859 };
 860
 861 static char banner[] __initdata =
 862         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 863
 864 static int __init ipip_init(void)
 865 {
 866         int err;
 867
 868         printk(banner);
 869
 870         if (xfrm4_tunnel_register(&ipip_handler) < 0) {
 871                 printk(KERN_INFO "ipip init: can't register tunnel\n");
 872                 return -EAGAIN;
 873         }
 874
 875         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 876                                            "tunl0",
 877                                            ipip_tunnel_setup);
 878         if (!ipip_fb_tunnel_dev) {
 879                 err = -ENOMEM;
 880                 goto fail;
 881         }
 882
 883         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
 884
 885         if ((err = register_netdev(ipip_fb_tunnel_dev)))
 886             goto fail;
 887  out:
 888         return err;
 889  fail:
 890         xfrm4_tunnel_deregister(&ipip_handler);
 891         free_netdev(ipip_fb_tunnel_dev);
 892         goto out;
 893 }
 894
 895 static void __exit ipip_fini(void)
 896 {
 897         if (xfrm4_tunnel_deregister(&ipip_handler) < 0)
 898                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 899
 900         unregister_netdev(ipip_fb_tunnel_dev);
 901 }
 902
 903 module_init(ipip_init);
 904 module_exit(ipip_fini);
 905 MODULE_LICENSE("GPL");