ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder. 
3  *
4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *      Authors:
7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *      Fixes:
10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
11  *                                      a module taking up 2 pages).
12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *                                      to keep ip_forward happy.
14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *                                      I do not want to merge them together.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  *
27  */
28
29 /* tunnel.c: an IP tunnel driver
30
31         The purpose of this driver is to provide an IP tunnel through
32         which you can tunnel network traffic transparently across subnets.
33
34         This was written by looking at Nick Holloway's dummy driver
35         Thanks for the great code!
36
37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
38                 
39         Minor tweaks:
40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41                 dev->hard_header/hard_header_len changed to use no headers.
42                 Comments/bracketing tweaked.
43                 Made the tunnels use dev->name not tunnel: when error reporting.
44                 Added tx_dropped stat
45                 
46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
47
48         Reworked:
49                 Changed to tunnel to destination gateway in addition to the
50                         tunnel's pointopoint address
51                 Almost completely rewritten
52                 Note:  There is currently no firewall or ICMP handling done.
53
54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
55                 
56 */
57
58 /* Things I wish I had known when writing the tunnel driver:
59
60         When the tunnel_xmit() function is called, the skb contains the
61         packet to be sent (plus a great deal of extra info), and dev
62         contains the tunnel device that _we_ are.
63
64         When we are passed a packet, we are expected to fill in the
65         source address with our source IP address.
66
67         What is the proper way to allocate, copy and free a buffer?
68         After you allocate it, it is a "0 length" chunk of memory
69         starting at zero.  If you want to add headers to the buffer
70         later, you'll have to call "skb_reserve(skb, amount)" with
71         the amount of memory you want reserved.  Then, you call
72         "skb_put(skb, amount)" with the amount of space you want in
73         the buffer.  skb_put() returns a pointer to the top (#0) of
74         that buffer.  skb->len is set to the amount of space you have
75         "allocated" with skb_put().  You can then write up to skb->len
76         bytes to that buffer.  If you need more, you can call skb_put()
77         again with the additional amount of space you need.  You can
78         find out how much more space you can allocate by calling 
79         "skb_tailroom(skb)".
80         Now, to add header space, call "skb_push(skb, header_len)".
81         This creates space at the beginning of the buffer and returns
82         a pointer to this new space.  If later you need to strip a
83         header from a buffer, call "skb_pull(skb, header_len)".
84         skb_headroom() will return how much space is left at the top
85         of the buffer (before the main data).  Remember, this headroom
86         space must be reserved before the skb_put() function is called.
87         */
88
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94
95  
96 #include <linux/config.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/sched.h>
100 #include <linux/kernel.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <linux/in.h>
105 #include <linux/tcp.h>
106 #include <linux/udp.h>
107 #include <linux/if_arp.h>
108 #include <linux/mroute.h>
109 #include <linux/init.h>
110 #include <linux/netfilter_ipv4.h>
111
112 #include <net/sock.h>
113 #include <net/ip.h>
114 #include <net/icmp.h>
115 #include <net/protocol.h>
116 #include <net/ipip.h>
117 #include <net/inet_ecn.h>
118 #include <net/xfrm.h>
119
120 #define HASH_SIZE  16
121 #define HASH(addr) ((addr^(addr>>4))&0xF)
122
123 static int ipip_fb_tunnel_init(struct net_device *dev);
124 static int ipip_tunnel_init(struct net_device *dev);
125 static void ipip_tunnel_setup(struct net_device *dev);
126
127 static struct net_device *ipip_fb_tunnel_dev;
128
129 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
130 static struct ip_tunnel *tunnels_r[HASH_SIZE];
131 static struct ip_tunnel *tunnels_l[HASH_SIZE];
132 static struct ip_tunnel *tunnels_wc[1];
133 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
134
135 static rwlock_t ipip_lock = RW_LOCK_UNLOCKED;
136
137 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
138 {
139         unsigned h0 = HASH(remote);
140         unsigned h1 = HASH(local);
141         struct ip_tunnel *t;
142
143         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
144                 if (local == t->parms.iph.saddr &&
145                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
146                         return t;
147         }
148         for (t = tunnels_r[h0]; t; t = t->next) {
149                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
150                         return t;
151         }
152         for (t = tunnels_l[h1]; t; t = t->next) {
153                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
154                         return t;
155         }
156         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
157                 return t;
158         return NULL;
159 }
160
161 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
162 {
163         u32 remote = t->parms.iph.daddr;
164         u32 local = t->parms.iph.saddr;
165         unsigned h = 0;
166         int prio = 0;
167
168         if (remote) {
169                 prio |= 2;
170                 h ^= HASH(remote);
171         }
172         if (local) {
173                 prio |= 1;
174                 h ^= HASH(local);
175         }
176         return &tunnels[prio][h];
177 }
178
179
180 static void ipip_tunnel_unlink(struct ip_tunnel *t)
181 {
182         struct ip_tunnel **tp;
183
184         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
185                 if (t == *tp) {
186                         write_lock_bh(&ipip_lock);
187                         *tp = t->next;
188                         write_unlock_bh(&ipip_lock);
189                         break;
190                 }
191         }
192 }
193
194 static void ipip_tunnel_link(struct ip_tunnel *t)
195 {
196         struct ip_tunnel **tp = ipip_bucket(t);
197
198         t->next = *tp;
199         write_lock_bh(&ipip_lock);
200         *tp = t;
201         write_unlock_bh(&ipip_lock);
202 }
203
204 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
205 {
206         u32 remote = parms->iph.daddr;
207         u32 local = parms->iph.saddr;
208         struct ip_tunnel *t, **tp, *nt;
209         struct net_device *dev;
210         unsigned h = 0;
211         int prio = 0;
212         char name[IFNAMSIZ];
213
214         if (remote) {
215                 prio |= 2;
216                 h ^= HASH(remote);
217         }
218         if (local) {
219                 prio |= 1;
220                 h ^= HASH(local);
221         }
222         for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
223                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
224                         return t;
225         }
226         if (!create)
227                 return NULL;
228
229         if (parms->name[0])
230                 strlcpy(name, parms->name, IFNAMSIZ);
231         else {
232                 int i;
233                 for (i=1; i<100; i++) {
234                         sprintf(name, "tunl%d", i);
235                         if (__dev_get_by_name(name) == NULL)
236                                 break;
237                 }
238                 if (i==100)
239                         goto failed;
240         }
241
242         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
243         if (dev == NULL)
244                 return NULL;
245
246         nt = dev->priv;
247         SET_MODULE_OWNER(dev);
248         dev->init = ipip_tunnel_init;
249         dev->destructor = free_netdev;
250         nt->parms = *parms;
251
252         if (register_netdevice(dev) < 0) {
253                 free_netdev(dev);
254                 goto failed;
255         }
256
257         dev_hold(dev);
258         ipip_tunnel_link(nt);
259         /* Do not decrement MOD_USE_COUNT here. */
260         return nt;
261
262 failed:
263         return NULL;
264 }
265
266 static void ipip_tunnel_uninit(struct net_device *dev)
267 {
268         if (dev == ipip_fb_tunnel_dev) {
269                 write_lock_bh(&ipip_lock);
270                 tunnels_wc[0] = NULL;
271                 write_unlock_bh(&ipip_lock);
272         } else
273                 ipip_tunnel_unlink((struct ip_tunnel*)dev->priv);
274         dev_put(dev);
275 }
276
277 static void ipip_err(struct sk_buff *skb, void *__unused)
278 {
279 #ifndef I_WISH_WORLD_WERE_PERFECT
280
281 /* It is not :-( All the routers (except for Linux) return only
282    8 bytes of packet payload. It means, that precise relaying of
283    ICMP in the real Internet is absolutely infeasible.
284  */
285         struct iphdr *iph = (struct iphdr*)skb->data;
286         int type = skb->h.icmph->type;
287         int code = skb->h.icmph->code;
288         struct ip_tunnel *t;
289
290         switch (type) {
291         default:
292         case ICMP_PARAMETERPROB:
293                 return;
294
295         case ICMP_DEST_UNREACH:
296                 switch (code) {
297                 case ICMP_SR_FAILED:
298                 case ICMP_PORT_UNREACH:
299                         /* Impossible event. */
300                         return;
301                 case ICMP_FRAG_NEEDED:
302                         /* Soft state for pmtu is maintained by IP core. */
303                         return;
304                 default:
305                         /* All others are translated to HOST_UNREACH.
306                            rfc2003 contains "deep thoughts" about NET_UNREACH,
307                            I believe they are just ether pollution. --ANK
308                          */
309                         break;
310                 }
311                 break;
312         case ICMP_TIME_EXCEEDED:
313                 if (code != ICMP_EXC_TTL)
314                         return;
315                 break;
316         }
317
318         read_lock(&ipip_lock);
319         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
320         if (t == NULL || t->parms.iph.daddr == 0)
321                 goto out;
322         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
323                 goto out;
324
325         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
326                 t->err_count++;
327         else
328                 t->err_count = 1;
329         t->err_time = jiffies;
330 out:
331         read_unlock(&ipip_lock);
332         return;
333 #else
334         struct iphdr *iph = (struct iphdr*)dp;
335         int hlen = iph->ihl<<2;
336         struct iphdr *eiph;
337         int type = skb->h.icmph->type;
338         int code = skb->h.icmph->code;
339         int rel_type = 0;
340         int rel_code = 0;
341         int rel_info = 0;
342         struct sk_buff *skb2;
343         struct flowi fl;
344         struct rtable *rt;
345
346         if (len < hlen + sizeof(struct iphdr))
347                 return;
348         eiph = (struct iphdr*)(dp + hlen);
349
350         switch (type) {
351         default:
352                 return;
353         case ICMP_PARAMETERPROB:
354                 if (skb->h.icmph->un.gateway < hlen)
355                         return;
356
357                 /* So... This guy found something strange INSIDE encapsulated
358                    packet. Well, he is fool, but what can we do ?
359                  */
360                 rel_type = ICMP_PARAMETERPROB;
361                 rel_info = skb->h.icmph->un.gateway - hlen;
362                 break;
363
364         case ICMP_DEST_UNREACH:
365                 switch (code) {
366                 case ICMP_SR_FAILED:
367                 case ICMP_PORT_UNREACH:
368                         /* Impossible event. */
369                         return;
370                 case ICMP_FRAG_NEEDED:
371                         /* And it is the only really necessary thing :-) */
372                         rel_info = ntohs(skb->h.icmph->un.frag.mtu);
373                         if (rel_info < hlen+68)
374                                 return;
375                         rel_info -= hlen;
376                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
377                         if (rel_info > ntohs(eiph->tot_len))
378                                 return;
379                         break;
380                 default:
381                         /* All others are translated to HOST_UNREACH.
382                            rfc2003 contains "deep thoughts" about NET_UNREACH,
383                            I believe, it is just ether pollution. --ANK
384                          */
385                         rel_type = ICMP_DEST_UNREACH;
386                         rel_code = ICMP_HOST_UNREACH;
387                         break;
388                 }
389                 break;
390         case ICMP_TIME_EXCEEDED:
391                 if (code != ICMP_EXC_TTL)
392                         return;
393                 break;
394         }
395
396         /* Prepare fake skb to feed it to icmp_send */
397         skb2 = skb_clone(skb, GFP_ATOMIC);
398         if (skb2 == NULL)
399                 return;
400         dst_release(skb2->dst);
401         skb2->dst = NULL;
402         skb_pull(skb2, skb->data - (u8*)eiph);
403         skb2->nh.raw = skb2->data;
404
405         /* Try to guess incoming interface */
406         memset(&fl, 0, sizeof(fl));
407         fl.fl4_daddr = eiph->saddr;
408         fl.fl4_tos = RT_TOS(eiph->tos);
409         fl.proto = IPPROTO_IPIP;
410         if (ip_route_output_key(&rt, &key)) {
411                 kfree_skb(skb2);
412                 return;
413         }
414         skb2->dev = rt->u.dst.dev;
415
416         /* route "incoming" packet */
417         if (rt->rt_flags&RTCF_LOCAL) {
418                 ip_rt_put(rt);
419                 rt = NULL;
420                 fl.fl4_daddr = eiph->daddr;
421                 fl.fl4_src = eiph->saddr;
422                 fl.fl4_tos = eiph->tos;
423                 if (ip_route_output_key(&rt, &fl) ||
424                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
425                         ip_rt_put(rt);
426                         kfree_skb(skb2);
427                         return;
428                 }
429         } else {
430                 ip_rt_put(rt);
431                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
432                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
433                         kfree_skb(skb2);
434                         return;
435                 }
436         }
437
438         /* change mtu on this route */
439         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
440                 if (rel_info > dst_pmtu(skb2->dst)) {
441                         kfree_skb(skb2);
442                         return;
443                 }
444                 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
445                 rel_info = htonl(rel_info);
446         } else if (type == ICMP_TIME_EXCEEDED) {
447                 struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
448                 if (t->parms.iph.ttl) {
449                         rel_type = ICMP_DEST_UNREACH;
450                         rel_code = ICMP_HOST_UNREACH;
451                 }
452         }
453
454         icmp_send(skb2, rel_type, rel_code, rel_info);
455         kfree_skb(skb2);
456         return;
457 #endif
458 }
459
460 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
461 {
462         struct iphdr *inner_iph = skb->nh.iph;
463
464         if (INET_ECN_is_ce(outer_iph->tos) &&
465             INET_ECN_is_not_ce(inner_iph->tos))
466                 IP_ECN_set_ce(inner_iph);
467 }
468
469 static int ipip_rcv(struct sk_buff *skb)
470 {
471         struct iphdr *iph;
472         struct ip_tunnel *tunnel;
473
474         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
475                 goto out;
476
477         iph = skb->nh.iph;
478
479         read_lock(&ipip_lock);
480         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
481                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
482                         kfree_skb(skb);
483                         return 0;
484                 }
485
486                 secpath_reset(skb);
487
488                 skb->mac.raw = skb->nh.raw;
489                 skb->nh.raw = skb->data;
490                 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
491                 skb->protocol = htons(ETH_P_IP);
492                 skb->pkt_type = PACKET_HOST;
493
494                 tunnel->stat.rx_packets++;
495                 tunnel->stat.rx_bytes += skb->len;
496                 skb->dev = tunnel->dev;
497                 dst_release(skb->dst);
498                 skb->dst = NULL;
499 #ifdef CONFIG_NETFILTER
500                 nf_conntrack_put(skb->nfct);
501                 skb->nfct = NULL;
502 #ifdef CONFIG_NETFILTER_DEBUG
503                 skb->nf_debug = 0;
504 #endif
505 #endif
506                 ipip_ecn_decapsulate(iph, skb);
507                 netif_rx(skb);
508                 read_unlock(&ipip_lock);
509                 return 0;
510         }
511         read_unlock(&ipip_lock);
512
513 out:
514         return -1;
515 }
516
517 /*
518  *      This function assumes it is being called from dev_queue_xmit()
519  *      and that skb is filled properly by that function.
520  */
521
522 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
523 {
524         struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
525         struct net_device_stats *stats = &tunnel->stat;
526         struct iphdr  *tiph = &tunnel->parms.iph;
527         u8     tos = tunnel->parms.iph.tos;
528         u16    df = tiph->frag_off;
529         struct rtable *rt;                      /* Route to the other host */
530         struct net_device *tdev;                        /* Device to other host */
531         struct iphdr  *old_iph = skb->nh.iph;
532         struct iphdr  *iph;                     /* Our new IP header */
533         int    max_headroom;                    /* The extra header space needed */
534         u32    dst = tiph->daddr;
535         int    mtu;
536
537         if (tunnel->recursion++) {
538                 tunnel->stat.collisions++;
539                 goto tx_error;
540         }
541
542         if (skb->protocol != htons(ETH_P_IP))
543                 goto tx_error;
544
545         if (tos&1)
546                 tos = old_iph->tos;
547
548         if (!dst) {
549                 /* NBMA tunnel */
550                 if ((rt = (struct rtable*)skb->dst) == NULL) {
551                         tunnel->stat.tx_fifo_errors++;
552                         goto tx_error;
553                 }
554                 if ((dst = rt->rt_gateway) == 0)
555                         goto tx_error_icmp;
556         }
557
558         {
559                 struct flowi fl = { .oif = tunnel->parms.link,
560                                     .nl_u = { .ip4_u =
561                                               { .daddr = dst,
562                                                 .saddr = tiph->saddr,
563                                                 .tos = RT_TOS(tos) } },
564                                     .proto = IPPROTO_IPIP };
565                 if (ip_route_output_key(&rt, &fl)) {
566                         tunnel->stat.tx_carrier_errors++;
567                         goto tx_error_icmp;
568                 }
569         }
570         tdev = rt->u.dst.dev;
571
572         if (tdev == dev) {
573                 ip_rt_put(rt);
574                 tunnel->stat.collisions++;
575                 goto tx_error;
576         }
577
578         if (tiph->frag_off)
579                 mtu = dst_pmtu(&rt->u.dst) - sizeof(struct iphdr);
580         else
581                 mtu = skb->dst ? dst_pmtu(skb->dst) : dev->mtu;
582
583         if (mtu < 68) {
584                 tunnel->stat.collisions++;
585                 ip_rt_put(rt);
586                 goto tx_error;
587         }
588         if (skb->dst)
589                 skb->dst->ops->update_pmtu(skb->dst, mtu);
590
591         df |= (old_iph->frag_off&htons(IP_DF));
592
593         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
594                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
595                 ip_rt_put(rt);
596                 goto tx_error;
597         }
598
599         if (tunnel->err_count > 0) {
600                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
601                         tunnel->err_count--;
602                         dst_link_failure(skb);
603                 } else
604                         tunnel->err_count = 0;
605         }
606
607         /*
608          * Okay, now see if we can stuff it in the buffer as-is.
609          */
610         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
611
612         if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
613                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
614                 if (!new_skb) {
615                         ip_rt_put(rt);
616                         stats->tx_dropped++;
617                         dev_kfree_skb(skb);
618                         tunnel->recursion--;
619                         return 0;
620                 }
621                 if (skb->sk)
622                         skb_set_owner_w(new_skb, skb->sk);
623                 dev_kfree_skb(skb);
624                 skb = new_skb;
625                 old_iph = skb->nh.iph;
626         }
627
628         skb->h.raw = skb->nh.raw;
629         skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
630         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
631         dst_release(skb->dst);
632         skb->dst = &rt->u.dst;
633
634         /*
635          *      Push down and install the IPIP header.
636          */
637
638         iph                     =       skb->nh.iph;
639         iph->version            =       4;
640         iph->ihl                =       sizeof(struct iphdr)>>2;
641         iph->frag_off           =       df;
642         iph->protocol           =       IPPROTO_IPIP;
643         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
644         iph->daddr              =       rt->rt_dst;
645         iph->saddr              =       rt->rt_src;
646
647         if ((iph->ttl = tiph->ttl) == 0)
648                 iph->ttl        =       old_iph->ttl;
649
650 #ifdef CONFIG_NETFILTER
651         nf_conntrack_put(skb->nfct);
652         skb->nfct = NULL;
653 #ifdef CONFIG_NETFILTER_DEBUG
654         skb->nf_debug = 0;
655 #endif
656 #endif
657
658         IPTUNNEL_XMIT();
659         tunnel->recursion--;
660         return 0;
661
662 tx_error_icmp:
663         dst_link_failure(skb);
664 tx_error:
665         stats->tx_errors++;
666         dev_kfree_skb(skb);
667         tunnel->recursion--;
668         return 0;
669 }
670
671 static int
672 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
673 {
674         int err = 0;
675         struct ip_tunnel_parm p;
676         struct ip_tunnel *t;
677
678         switch (cmd) {
679         case SIOCGETTUNNEL:
680                 t = NULL;
681                 if (dev == ipip_fb_tunnel_dev) {
682                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
683                                 err = -EFAULT;
684                                 break;
685                         }
686                         t = ipip_tunnel_locate(&p, 0);
687                 }
688                 if (t == NULL)
689                         t = (struct ip_tunnel*)dev->priv;
690                 memcpy(&p, &t->parms, sizeof(p));
691                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
692                         err = -EFAULT;
693                 break;
694
695         case SIOCADDTUNNEL:
696         case SIOCCHGTUNNEL:
697                 err = -EPERM;
698                 if (!capable(CAP_NET_ADMIN))
699                         goto done;
700
701                 err = -EFAULT;
702                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
703                         goto done;
704
705                 err = -EINVAL;
706                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
707                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
708                         goto done;
709                 if (p.iph.ttl)
710                         p.iph.frag_off |= htons(IP_DF);
711
712                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
713
714                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
715                         if (t != NULL) {
716                                 if (t->dev != dev) {
717                                         err = -EEXIST;
718                                         break;
719                                 }
720                         } else {
721                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
722                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
723                                         err = -EINVAL;
724                                         break;
725                                 }
726                                 t = (struct ip_tunnel*)dev->priv;
727                                 ipip_tunnel_unlink(t);
728                                 t->parms.iph.saddr = p.iph.saddr;
729                                 t->parms.iph.daddr = p.iph.daddr;
730                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
731                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
732                                 ipip_tunnel_link(t);
733                                 netdev_state_change(dev);
734                         }
735                 }
736
737                 if (t) {
738                         err = 0;
739                         if (cmd == SIOCCHGTUNNEL) {
740                                 t->parms.iph.ttl = p.iph.ttl;
741                                 t->parms.iph.tos = p.iph.tos;
742                                 t->parms.iph.frag_off = p.iph.frag_off;
743                         }
744                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
745                                 err = -EFAULT;
746                 } else
747                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
748                 break;
749
750         case SIOCDELTUNNEL:
751                 err = -EPERM;
752                 if (!capable(CAP_NET_ADMIN))
753                         goto done;
754
755                 if (dev == ipip_fb_tunnel_dev) {
756                         err = -EFAULT;
757                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
758                                 goto done;
759                         err = -ENOENT;
760                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
761                                 goto done;
762                         err = -EPERM;
763                         if (t->dev == ipip_fb_tunnel_dev)
764                                 goto done;
765                         dev = t->dev;
766                 }
767                 err = unregister_netdevice(dev);
768                 break;
769
770         default:
771                 err = -EINVAL;
772         }
773
774 done:
775         return err;
776 }
777
778 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
779 {
780         return &(((struct ip_tunnel*)dev->priv)->stat);
781 }
782
783 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
784 {
785         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
786                 return -EINVAL;
787         dev->mtu = new_mtu;
788         return 0;
789 }
790
791 static void ipip_tunnel_setup(struct net_device *dev)
792 {
793         SET_MODULE_OWNER(dev);
794         dev->uninit             = ipip_tunnel_uninit;
795         dev->hard_start_xmit    = ipip_tunnel_xmit;
796         dev->get_stats          = ipip_tunnel_get_stats;
797         dev->do_ioctl           = ipip_tunnel_ioctl;
798         dev->change_mtu         = ipip_tunnel_change_mtu;
799
800         dev->type               = ARPHRD_TUNNEL;
801         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
802         dev->mtu                = 1500 - sizeof(struct iphdr);
803         dev->flags              = IFF_NOARP;
804         dev->iflink             = 0;
805         dev->addr_len           = 4;
806 }
807
808 static int ipip_tunnel_init(struct net_device *dev)
809 {
810         struct net_device *tdev = NULL;
811         struct ip_tunnel *tunnel;
812         struct iphdr *iph;
813
814         tunnel = (struct ip_tunnel*)dev->priv;
815         iph = &tunnel->parms.iph;
816
817         tunnel->dev = dev;
818         strcpy(tunnel->parms.name, dev->name);
819
820         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
821         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
822
823         if (iph->daddr) {
824                 struct flowi fl = { .oif = tunnel->parms.link,
825                                     .nl_u = { .ip4_u =
826                                               { .daddr = iph->daddr,
827                                                 .saddr = iph->saddr,
828                                                 .tos = RT_TOS(iph->tos) } },
829                                     .proto = IPPROTO_IPIP };
830                 struct rtable *rt;
831                 if (!ip_route_output_key(&rt, &fl)) {
832                         tdev = rt->u.dst.dev;
833                         ip_rt_put(rt);
834                 }
835                 dev->flags |= IFF_POINTOPOINT;
836         }
837
838         if (!tdev && tunnel->parms.link)
839                 tdev = __dev_get_by_index(tunnel->parms.link);
840
841         if (tdev) {
842                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
843                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
844         }
845         dev->iflink = tunnel->parms.link;
846
847         return 0;
848 }
849
850 static int __init ipip_fb_tunnel_init(struct net_device *dev)
851 {
852         struct ip_tunnel *tunnel = dev->priv;
853         struct iphdr *iph = &tunnel->parms.iph;
854
855         tunnel->dev = dev;
856         strcpy(tunnel->parms.name, dev->name);
857
858         iph->version            = 4;
859         iph->protocol           = IPPROTO_IPIP;
860         iph->ihl                = 5;
861
862         dev_hold(dev);
863         tunnels_wc[0]           = tunnel;
864         return 0;
865 }
866
867 static struct xfrm_tunnel ipip_handler = {
868         .handler        =       ipip_rcv,
869         .err_handler    =       ipip_err,
870 };
871
872 static char banner[] __initdata =
873         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
874
875 static int __init ipip_init(void)
876 {
877         int err;
878
879         printk(banner);
880
881         if (xfrm4_tunnel_register(&ipip_handler) < 0) {
882                 printk(KERN_INFO "ipip init: can't register tunnel\n");
883                 return -EAGAIN;
884         }
885
886         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
887                                            "tunl0",
888                                            ipip_tunnel_setup);
889         if (!ipip_fb_tunnel_dev) {
890                 err = -ENOMEM;
891                 goto fail;
892         }
893
894         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
895
896         if ((err = register_netdev(ipip_fb_tunnel_dev)))
897             goto fail;
898  out:
899         return err;
900  fail:
901         xfrm4_tunnel_deregister(&ipip_handler);
902         free_netdev(ipip_fb_tunnel_dev);
903         goto out;
904 }
905
906 static void __exit ipip_fini(void)
907 {
908         if (xfrm4_tunnel_deregister(&ipip_handler) < 0)
909                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
910
911         unregister_netdev(ipip_fb_tunnel_dev);
912 }
913
914 module_init(ipip_init);
915 module_exit(ipip_fini);
916 MODULE_LICENSE("GPL");