vserver 2.0 rc7
[linux-2.6.git] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder. 
3  *
4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *      Authors:
7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *      Fixes:
10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
11  *                                      a module taking up 2 pages).
12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *                                      to keep ip_forward happy.
14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *                                      I do not want to merge them together.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  *
27  */
28
29 /* tunnel.c: an IP tunnel driver
30
31         The purpose of this driver is to provide an IP tunnel through
32         which you can tunnel network traffic transparently across subnets.
33
34         This was written by looking at Nick Holloway's dummy driver
35         Thanks for the great code!
36
37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
38                 
39         Minor tweaks:
40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41                 dev->hard_header/hard_header_len changed to use no headers.
42                 Comments/bracketing tweaked.
43                 Made the tunnels use dev->name not tunnel: when error reporting.
44                 Added tx_dropped stat
45                 
46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
47
48         Reworked:
49                 Changed to tunnel to destination gateway in addition to the
50                         tunnel's pointopoint address
51                 Almost completely rewritten
52                 Note:  There is currently no firewall or ICMP handling done.
53
54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
55                 
56 */
57
58 /* Things I wish I had known when writing the tunnel driver:
59
60         When the tunnel_xmit() function is called, the skb contains the
61         packet to be sent (plus a great deal of extra info), and dev
62         contains the tunnel device that _we_ are.
63
64         When we are passed a packet, we are expected to fill in the
65         source address with our source IP address.
66
67         What is the proper way to allocate, copy and free a buffer?
68         After you allocate it, it is a "0 length" chunk of memory
69         starting at zero.  If you want to add headers to the buffer
70         later, you'll have to call "skb_reserve(skb, amount)" with
71         the amount of memory you want reserved.  Then, you call
72         "skb_put(skb, amount)" with the amount of space you want in
73         the buffer.  skb_put() returns a pointer to the top (#0) of
74         that buffer.  skb->len is set to the amount of space you have
75         "allocated" with skb_put().  You can then write up to skb->len
76         bytes to that buffer.  If you need more, you can call skb_put()
77         again with the additional amount of space you need.  You can
78         find out how much more space you can allocate by calling 
79         "skb_tailroom(skb)".
80         Now, to add header space, call "skb_push(skb, header_len)".
81         This creates space at the beginning of the buffer and returns
82         a pointer to this new space.  If later you need to strip a
83         header from a buffer, call "skb_pull(skb, header_len)".
84         skb_headroom() will return how much space is left at the top
85         of the buffer (before the main data).  Remember, this headroom
86         space must be reserved before the skb_put() function is called.
87         */
88
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94
95  
96 #include <linux/config.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/sched.h>
100 #include <linux/kernel.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <linux/in.h>
105 #include <linux/tcp.h>
106 #include <linux/udp.h>
107 #include <linux/if_arp.h>
108 #include <linux/mroute.h>
109 #include <linux/init.h>
110 #include <linux/netfilter_ipv4.h>
111
112 #include <net/sock.h>
113 #include <net/ip.h>
114 #include <net/icmp.h>
115 #include <net/protocol.h>
116 #include <net/ipip.h>
117 #include <net/inet_ecn.h>
118 #include <net/xfrm.h>
119
120 #define HASH_SIZE  16
121 #define HASH(addr) ((addr^(addr>>4))&0xF)
122
123 static int ipip_fb_tunnel_init(struct net_device *dev);
124 static int ipip_tunnel_init(struct net_device *dev);
125 static void ipip_tunnel_setup(struct net_device *dev);
126
127 static struct net_device *ipip_fb_tunnel_dev;
128
129 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
130 static struct ip_tunnel *tunnels_r[HASH_SIZE];
131 static struct ip_tunnel *tunnels_l[HASH_SIZE];
132 static struct ip_tunnel *tunnels_wc[1];
133 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
134
135 static DEFINE_RWLOCK(ipip_lock);
136
137 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
138 {
139         unsigned h0 = HASH(remote);
140         unsigned h1 = HASH(local);
141         struct ip_tunnel *t;
142
143         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
144                 if (local == t->parms.iph.saddr &&
145                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
146                         return t;
147         }
148         for (t = tunnels_r[h0]; t; t = t->next) {
149                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
150                         return t;
151         }
152         for (t = tunnels_l[h1]; t; t = t->next) {
153                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
154                         return t;
155         }
156         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
157                 return t;
158         return NULL;
159 }
160
161 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
162 {
163         u32 remote = t->parms.iph.daddr;
164         u32 local = t->parms.iph.saddr;
165         unsigned h = 0;
166         int prio = 0;
167
168         if (remote) {
169                 prio |= 2;
170                 h ^= HASH(remote);
171         }
172         if (local) {
173                 prio |= 1;
174                 h ^= HASH(local);
175         }
176         return &tunnels[prio][h];
177 }
178
179
180 static void ipip_tunnel_unlink(struct ip_tunnel *t)
181 {
182         struct ip_tunnel **tp;
183
184         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
185                 if (t == *tp) {
186                         write_lock_bh(&ipip_lock);
187                         *tp = t->next;
188                         write_unlock_bh(&ipip_lock);
189                         break;
190                 }
191         }
192 }
193
194 static void ipip_tunnel_link(struct ip_tunnel *t)
195 {
196         struct ip_tunnel **tp = ipip_bucket(t);
197
198         t->next = *tp;
199         write_lock_bh(&ipip_lock);
200         *tp = t;
201         write_unlock_bh(&ipip_lock);
202 }
203
204 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
205 {
206         u32 remote = parms->iph.daddr;
207         u32 local = parms->iph.saddr;
208         struct ip_tunnel *t, **tp, *nt;
209         struct net_device *dev;
210         unsigned h = 0;
211         int prio = 0;
212         char name[IFNAMSIZ];
213
214         if (remote) {
215                 prio |= 2;
216                 h ^= HASH(remote);
217         }
218         if (local) {
219                 prio |= 1;
220                 h ^= HASH(local);
221         }
222         for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
223                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
224                         return t;
225         }
226         if (!create)
227                 return NULL;
228
229         if (parms->name[0])
230                 strlcpy(name, parms->name, IFNAMSIZ);
231         else {
232                 int i;
233                 for (i=1; i<100; i++) {
234                         sprintf(name, "tunl%d", i);
235                         if (__dev_get_by_name(name) == NULL)
236                                 break;
237                 }
238                 if (i==100)
239                         goto failed;
240         }
241
242         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
243         if (dev == NULL)
244                 return NULL;
245
246         nt = dev->priv;
247         SET_MODULE_OWNER(dev);
248         dev->init = ipip_tunnel_init;
249         nt->parms = *parms;
250
251         if (register_netdevice(dev) < 0) {
252                 free_netdev(dev);
253                 goto failed;
254         }
255
256         dev_hold(dev);
257         ipip_tunnel_link(nt);
258         /* Do not decrement MOD_USE_COUNT here. */
259         return nt;
260
261 failed:
262         return NULL;
263 }
264
265 static void ipip_tunnel_uninit(struct net_device *dev)
266 {
267         if (dev == ipip_fb_tunnel_dev) {
268                 write_lock_bh(&ipip_lock);
269                 tunnels_wc[0] = NULL;
270                 write_unlock_bh(&ipip_lock);
271         } else
272                 ipip_tunnel_unlink((struct ip_tunnel*)dev->priv);
273         dev_put(dev);
274 }
275
276 static void ipip_err(struct sk_buff *skb, void *__unused)
277 {
278 #ifndef I_WISH_WORLD_WERE_PERFECT
279
280 /* It is not :-( All the routers (except for Linux) return only
281    8 bytes of packet payload. It means, that precise relaying of
282    ICMP in the real Internet is absolutely infeasible.
283  */
284         struct iphdr *iph = (struct iphdr*)skb->data;
285         int type = skb->h.icmph->type;
286         int code = skb->h.icmph->code;
287         struct ip_tunnel *t;
288
289         switch (type) {
290         default:
291         case ICMP_PARAMETERPROB:
292                 return;
293
294         case ICMP_DEST_UNREACH:
295                 switch (code) {
296                 case ICMP_SR_FAILED:
297                 case ICMP_PORT_UNREACH:
298                         /* Impossible event. */
299                         return;
300                 case ICMP_FRAG_NEEDED:
301                         /* Soft state for pmtu is maintained by IP core. */
302                         return;
303                 default:
304                         /* All others are translated to HOST_UNREACH.
305                            rfc2003 contains "deep thoughts" about NET_UNREACH,
306                            I believe they are just ether pollution. --ANK
307                          */
308                         break;
309                 }
310                 break;
311         case ICMP_TIME_EXCEEDED:
312                 if (code != ICMP_EXC_TTL)
313                         return;
314                 break;
315         }
316
317         read_lock(&ipip_lock);
318         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
319         if (t == NULL || t->parms.iph.daddr == 0)
320                 goto out;
321         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
322                 goto out;
323
324         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
325                 t->err_count++;
326         else
327                 t->err_count = 1;
328         t->err_time = jiffies;
329 out:
330         read_unlock(&ipip_lock);
331         return;
332 #else
333         struct iphdr *iph = (struct iphdr*)dp;
334         int hlen = iph->ihl<<2;
335         struct iphdr *eiph;
336         int type = skb->h.icmph->type;
337         int code = skb->h.icmph->code;
338         int rel_type = 0;
339         int rel_code = 0;
340         int rel_info = 0;
341         struct sk_buff *skb2;
342         struct flowi fl;
343         struct rtable *rt;
344
345         if (len < hlen + sizeof(struct iphdr))
346                 return;
347         eiph = (struct iphdr*)(dp + hlen);
348
349         switch (type) {
350         default:
351                 return;
352         case ICMP_PARAMETERPROB:
353                 if (skb->h.icmph->un.gateway < hlen)
354                         return;
355
356                 /* So... This guy found something strange INSIDE encapsulated
357                    packet. Well, he is fool, but what can we do ?
358                  */
359                 rel_type = ICMP_PARAMETERPROB;
360                 rel_info = skb->h.icmph->un.gateway - hlen;
361                 break;
362
363         case ICMP_DEST_UNREACH:
364                 switch (code) {
365                 case ICMP_SR_FAILED:
366                 case ICMP_PORT_UNREACH:
367                         /* Impossible event. */
368                         return;
369                 case ICMP_FRAG_NEEDED:
370                         /* And it is the only really necessary thing :-) */
371                         rel_info = ntohs(skb->h.icmph->un.frag.mtu);
372                         if (rel_info < hlen+68)
373                                 return;
374                         rel_info -= hlen;
375                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
376                         if (rel_info > ntohs(eiph->tot_len))
377                                 return;
378                         break;
379                 default:
380                         /* All others are translated to HOST_UNREACH.
381                            rfc2003 contains "deep thoughts" about NET_UNREACH,
382                            I believe, it is just ether pollution. --ANK
383                          */
384                         rel_type = ICMP_DEST_UNREACH;
385                         rel_code = ICMP_HOST_UNREACH;
386                         break;
387                 }
388                 break;
389         case ICMP_TIME_EXCEEDED:
390                 if (code != ICMP_EXC_TTL)
391                         return;
392                 break;
393         }
394
395         /* Prepare fake skb to feed it to icmp_send */
396         skb2 = skb_clone(skb, GFP_ATOMIC);
397         if (skb2 == NULL)
398                 return;
399         dst_release(skb2->dst);
400         skb2->dst = NULL;
401         skb_pull(skb2, skb->data - (u8*)eiph);
402         skb2->nh.raw = skb2->data;
403
404         /* Try to guess incoming interface */
405         memset(&fl, 0, sizeof(fl));
406         fl.fl4_daddr = eiph->saddr;
407         fl.fl4_tos = RT_TOS(eiph->tos);
408         fl.proto = IPPROTO_IPIP;
409         if (ip_route_output_key(&rt, &key)) {
410                 kfree_skb(skb2);
411                 return;
412         }
413         skb2->dev = rt->u.dst.dev;
414
415         /* route "incoming" packet */
416         if (rt->rt_flags&RTCF_LOCAL) {
417                 ip_rt_put(rt);
418                 rt = NULL;
419                 fl.fl4_daddr = eiph->daddr;
420                 fl.fl4_src = eiph->saddr;
421                 fl.fl4_tos = eiph->tos;
422                 if (ip_route_output_key(&rt, &fl) ||
423                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
424                         ip_rt_put(rt);
425                         kfree_skb(skb2);
426                         return;
427                 }
428         } else {
429                 ip_rt_put(rt);
430                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
431                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
432                         kfree_skb(skb2);
433                         return;
434                 }
435         }
436
437         /* change mtu on this route */
438         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
439                 if (rel_info > dst_mtu(skb2->dst)) {
440                         kfree_skb(skb2);
441                         return;
442                 }
443                 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
444                 rel_info = htonl(rel_info);
445         } else if (type == ICMP_TIME_EXCEEDED) {
446                 struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
447                 if (t->parms.iph.ttl) {
448                         rel_type = ICMP_DEST_UNREACH;
449                         rel_code = ICMP_HOST_UNREACH;
450                 }
451         }
452
453         icmp_send(skb2, rel_type, rel_code, rel_info);
454         kfree_skb(skb2);
455         return;
456 #endif
457 }
458
459 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
460 {
461         struct iphdr *inner_iph = skb->nh.iph;
462
463         if (INET_ECN_is_ce(outer_iph->tos))
464                 IP_ECN_set_ce(inner_iph);
465 }
466
467 static int ipip_rcv(struct sk_buff *skb)
468 {
469         struct iphdr *iph;
470         struct ip_tunnel *tunnel;
471
472         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
473                 goto out;
474
475         iph = skb->nh.iph;
476
477         read_lock(&ipip_lock);
478         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
479                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
480                         read_unlock(&ipip_lock);
481                         kfree_skb(skb);
482                         return 0;
483                 }
484
485                 secpath_reset(skb);
486
487                 skb->mac.raw = skb->nh.raw;
488                 skb->nh.raw = skb->data;
489                 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
490                 skb->protocol = htons(ETH_P_IP);
491                 skb->pkt_type = PACKET_HOST;
492
493                 tunnel->stat.rx_packets++;
494                 tunnel->stat.rx_bytes += skb->len;
495                 skb->dev = tunnel->dev;
496                 dst_release(skb->dst);
497                 skb->dst = NULL;
498                 nf_reset(skb);
499                 ipip_ecn_decapsulate(iph, skb);
500                 netif_rx(skb);
501                 read_unlock(&ipip_lock);
502                 return 0;
503         }
504         read_unlock(&ipip_lock);
505
506 out:
507         return -1;
508 }
509
510 /*
511  *      This function assumes it is being called from dev_queue_xmit()
512  *      and that skb is filled properly by that function.
513  */
514
515 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
516 {
517         struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
518         struct net_device_stats *stats = &tunnel->stat;
519         struct iphdr  *tiph = &tunnel->parms.iph;
520         u8     tos = tunnel->parms.iph.tos;
521         u16    df = tiph->frag_off;
522         struct rtable *rt;                      /* Route to the other host */
523         struct net_device *tdev;                        /* Device to other host */
524         struct iphdr  *old_iph = skb->nh.iph;
525         struct iphdr  *iph;                     /* Our new IP header */
526         int    max_headroom;                    /* The extra header space needed */
527         u32    dst = tiph->daddr;
528         int    mtu;
529
530         if (tunnel->recursion++) {
531                 tunnel->stat.collisions++;
532                 goto tx_error;
533         }
534
535         if (skb->protocol != htons(ETH_P_IP))
536                 goto tx_error;
537
538         if (tos&1)
539                 tos = old_iph->tos;
540
541         if (!dst) {
542                 /* NBMA tunnel */
543                 if ((rt = (struct rtable*)skb->dst) == NULL) {
544                         tunnel->stat.tx_fifo_errors++;
545                         goto tx_error;
546                 }
547                 if ((dst = rt->rt_gateway) == 0)
548                         goto tx_error_icmp;
549         }
550
551         {
552                 struct flowi fl = { .oif = tunnel->parms.link,
553                                     .nl_u = { .ip4_u =
554                                               { .daddr = dst,
555                                                 .saddr = tiph->saddr,
556                                                 .tos = RT_TOS(tos) } },
557                                     .proto = IPPROTO_IPIP };
558                 if (ip_route_output_key(&rt, &fl)) {
559                         tunnel->stat.tx_carrier_errors++;
560                         goto tx_error_icmp;
561                 }
562         }
563         tdev = rt->u.dst.dev;
564
565         if (tdev == dev) {
566                 ip_rt_put(rt);
567                 tunnel->stat.collisions++;
568                 goto tx_error;
569         }
570
571         if (tiph->frag_off)
572                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
573         else
574                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
575
576         if (mtu < 68) {
577                 tunnel->stat.collisions++;
578                 ip_rt_put(rt);
579                 goto tx_error;
580         }
581         if (skb->dst)
582                 skb->dst->ops->update_pmtu(skb->dst, mtu);
583
584         df |= (old_iph->frag_off&htons(IP_DF));
585
586         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
587                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
588                 ip_rt_put(rt);
589                 goto tx_error;
590         }
591
592         if (tunnel->err_count > 0) {
593                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
594                         tunnel->err_count--;
595                         dst_link_failure(skb);
596                 } else
597                         tunnel->err_count = 0;
598         }
599
600         /*
601          * Okay, now see if we can stuff it in the buffer as-is.
602          */
603         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
604
605         if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
606                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
607                 if (!new_skb) {
608                         ip_rt_put(rt);
609                         stats->tx_dropped++;
610                         dev_kfree_skb(skb);
611                         tunnel->recursion--;
612                         return 0;
613                 }
614                 if (skb->sk)
615                         skb_set_owner_w(new_skb, skb->sk);
616                 dev_kfree_skb(skb);
617                 skb = new_skb;
618                 old_iph = skb->nh.iph;
619         }
620
621         skb->h.raw = skb->nh.raw;
622         skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
623         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
624         dst_release(skb->dst);
625         skb->dst = &rt->u.dst;
626
627         /*
628          *      Push down and install the IPIP header.
629          */
630
631         iph                     =       skb->nh.iph;
632         iph->version            =       4;
633         iph->ihl                =       sizeof(struct iphdr)>>2;
634         iph->frag_off           =       df;
635         iph->protocol           =       IPPROTO_IPIP;
636         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
637         iph->daddr              =       rt->rt_dst;
638         iph->saddr              =       rt->rt_src;
639
640         if ((iph->ttl = tiph->ttl) == 0)
641                 iph->ttl        =       old_iph->ttl;
642
643         nf_reset(skb);
644
645         IPTUNNEL_XMIT();
646         tunnel->recursion--;
647         return 0;
648
649 tx_error_icmp:
650         dst_link_failure(skb);
651 tx_error:
652         stats->tx_errors++;
653         dev_kfree_skb(skb);
654         tunnel->recursion--;
655         return 0;
656 }
657
658 static int
659 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
660 {
661         int err = 0;
662         struct ip_tunnel_parm p;
663         struct ip_tunnel *t;
664
665         switch (cmd) {
666         case SIOCGETTUNNEL:
667                 t = NULL;
668                 if (dev == ipip_fb_tunnel_dev) {
669                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
670                                 err = -EFAULT;
671                                 break;
672                         }
673                         t = ipip_tunnel_locate(&p, 0);
674                 }
675                 if (t == NULL)
676                         t = (struct ip_tunnel*)dev->priv;
677                 memcpy(&p, &t->parms, sizeof(p));
678                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
679                         err = -EFAULT;
680                 break;
681
682         case SIOCADDTUNNEL:
683         case SIOCCHGTUNNEL:
684                 err = -EPERM;
685                 if (!capable(CAP_NET_ADMIN))
686                         goto done;
687
688                 err = -EFAULT;
689                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
690                         goto done;
691
692                 err = -EINVAL;
693                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
694                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
695                         goto done;
696                 if (p.iph.ttl)
697                         p.iph.frag_off |= htons(IP_DF);
698
699                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
700
701                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
702                         if (t != NULL) {
703                                 if (t->dev != dev) {
704                                         err = -EEXIST;
705                                         break;
706                                 }
707                         } else {
708                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
709                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
710                                         err = -EINVAL;
711                                         break;
712                                 }
713                                 t = (struct ip_tunnel*)dev->priv;
714                                 ipip_tunnel_unlink(t);
715                                 t->parms.iph.saddr = p.iph.saddr;
716                                 t->parms.iph.daddr = p.iph.daddr;
717                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
718                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
719                                 ipip_tunnel_link(t);
720                                 netdev_state_change(dev);
721                         }
722                 }
723
724                 if (t) {
725                         err = 0;
726                         if (cmd == SIOCCHGTUNNEL) {
727                                 t->parms.iph.ttl = p.iph.ttl;
728                                 t->parms.iph.tos = p.iph.tos;
729                                 t->parms.iph.frag_off = p.iph.frag_off;
730                         }
731                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
732                                 err = -EFAULT;
733                 } else
734                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
735                 break;
736
737         case SIOCDELTUNNEL:
738                 err = -EPERM;
739                 if (!capable(CAP_NET_ADMIN))
740                         goto done;
741
742                 if (dev == ipip_fb_tunnel_dev) {
743                         err = -EFAULT;
744                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
745                                 goto done;
746                         err = -ENOENT;
747                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
748                                 goto done;
749                         err = -EPERM;
750                         if (t->dev == ipip_fb_tunnel_dev)
751                                 goto done;
752                         dev = t->dev;
753                 }
754                 err = unregister_netdevice(dev);
755                 break;
756
757         default:
758                 err = -EINVAL;
759         }
760
761 done:
762         return err;
763 }
764
765 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
766 {
767         return &(((struct ip_tunnel*)dev->priv)->stat);
768 }
769
770 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
771 {
772         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
773                 return -EINVAL;
774         dev->mtu = new_mtu;
775         return 0;
776 }
777
778 static void ipip_tunnel_setup(struct net_device *dev)
779 {
780         SET_MODULE_OWNER(dev);
781         dev->uninit             = ipip_tunnel_uninit;
782         dev->hard_start_xmit    = ipip_tunnel_xmit;
783         dev->get_stats          = ipip_tunnel_get_stats;
784         dev->do_ioctl           = ipip_tunnel_ioctl;
785         dev->change_mtu         = ipip_tunnel_change_mtu;
786         dev->destructor         = free_netdev;
787
788         dev->type               = ARPHRD_TUNNEL;
789         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
790         dev->mtu                = 1500 - sizeof(struct iphdr);
791         dev->flags              = IFF_NOARP;
792         dev->iflink             = 0;
793         dev->addr_len           = 4;
794 }
795
796 static int ipip_tunnel_init(struct net_device *dev)
797 {
798         struct net_device *tdev = NULL;
799         struct ip_tunnel *tunnel;
800         struct iphdr *iph;
801
802         tunnel = (struct ip_tunnel*)dev->priv;
803         iph = &tunnel->parms.iph;
804
805         tunnel->dev = dev;
806         strcpy(tunnel->parms.name, dev->name);
807
808         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
809         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
810
811         if (iph->daddr) {
812                 struct flowi fl = { .oif = tunnel->parms.link,
813                                     .nl_u = { .ip4_u =
814                                               { .daddr = iph->daddr,
815                                                 .saddr = iph->saddr,
816                                                 .tos = RT_TOS(iph->tos) } },
817                                     .proto = IPPROTO_IPIP };
818                 struct rtable *rt;
819                 if (!ip_route_output_key(&rt, &fl)) {
820                         tdev = rt->u.dst.dev;
821                         ip_rt_put(rt);
822                 }
823                 dev->flags |= IFF_POINTOPOINT;
824         }
825
826         if (!tdev && tunnel->parms.link)
827                 tdev = __dev_get_by_index(tunnel->parms.link);
828
829         if (tdev) {
830                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
831                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
832         }
833         dev->iflink = tunnel->parms.link;
834
835         return 0;
836 }
837
838 static int __init ipip_fb_tunnel_init(struct net_device *dev)
839 {
840         struct ip_tunnel *tunnel = dev->priv;
841         struct iphdr *iph = &tunnel->parms.iph;
842
843         tunnel->dev = dev;
844         strcpy(tunnel->parms.name, dev->name);
845
846         iph->version            = 4;
847         iph->protocol           = IPPROTO_IPIP;
848         iph->ihl                = 5;
849
850         dev_hold(dev);
851         tunnels_wc[0]           = tunnel;
852         return 0;
853 }
854
855 static struct xfrm_tunnel ipip_handler = {
856         .handler        =       ipip_rcv,
857         .err_handler    =       ipip_err,
858 };
859
860 static char banner[] __initdata =
861         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
862
863 static int __init ipip_init(void)
864 {
865         int err;
866
867         printk(banner);
868
869         if (xfrm4_tunnel_register(&ipip_handler) < 0) {
870                 printk(KERN_INFO "ipip init: can't register tunnel\n");
871                 return -EAGAIN;
872         }
873
874         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
875                                            "tunl0",
876                                            ipip_tunnel_setup);
877         if (!ipip_fb_tunnel_dev) {
878                 err = -ENOMEM;
879                 goto err1;
880         }
881
882         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
883
884         if ((err = register_netdev(ipip_fb_tunnel_dev)))
885                 goto err2;
886  out:
887         return err;
888  err2:
889         free_netdev(ipip_fb_tunnel_dev);
890  err1:
891         xfrm4_tunnel_deregister(&ipip_handler);
892         goto out;
893 }
894
895 static void __exit ipip_fini(void)
896 {
897         if (xfrm4_tunnel_deregister(&ipip_handler) < 0)
898                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
899
900         unregister_netdev(ipip_fb_tunnel_dev);
901 }
902
903 module_init(ipip_init);
904 module_exit(ipip_fini);
905 MODULE_LICENSE("GPL");