1 diff -Nurb linux-2.6.22-592/drivers/net/Makefile linux-2.6.22-593/drivers/net/Makefile
2 --- linux-2.6.22-592/drivers/net/Makefile 2008-03-15 10:50:00.000000000 -0400
3 +++ linux-2.6.22-593/drivers/net/Makefile 2008-03-15 10:51:27.000000000 -0400
5 # Makefile for the Linux network (ethercard) device drivers.
9 obj-y +=ztun.o shortbridge.o
10 obj-$(CONFIG_E1000) += e1000/
11 obj-$(CONFIG_E1000E) += e1000e/
12 diff -Nurb linux-2.6.22-592/drivers/net/gre.c linux-2.6.22-593/drivers/net/gre.c
13 --- linux-2.6.22-592/drivers/net/gre.c 1969-12-31 19:00:00.000000000 -0500
14 +++ linux-2.6.22-593/drivers/net/gre.c 2008-03-15 10:51:27.000000000 -0400
17 + * Linux NET3: GRE over IP protocol decoder.
19 + * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
21 + * This program is free software; you can redistribute it and/or
22 + * modify it under the terms of the GNU General Public License
23 + * as published by the Free Software Foundation; either version
24 + * 2 of the License, or (at your option) any later version.
28 +#include <linux/capability.h>
29 +#include <linux/module.h>
30 +#include <linux/types.h>
31 +#include <linux/sched.h>
32 +#include <linux/kernel.h>
33 +#include <asm/uaccess.h>
34 +#include <linux/skbuff.h>
35 +#include <linux/netdevice.h>
36 +#include <linux/in.h>
37 +#include <linux/tcp.h>
38 +#include <linux/udp.h>
39 +#include <linux/if_arp.h>
40 +#include <linux/mroute.h>
41 +#include <linux/init.h>
42 +#include <linux/in6.h>
43 +#include <linux/inetdevice.h>
44 +#include <linux/etherdevice.h> /**XXX added XXX */
45 +#include <linux/igmp.h>
46 +#include <linux/netfilter_ipv4.h>
47 +#include <linux/if_ether.h>
49 +#include <net/sock.h>
51 +#include <net/icmp.h>
52 +#include <net/protocol.h>
53 +#include <net/ipip.h>
55 +#include <net/checksum.h>
56 +#include <net/dsfield.h>
57 +#include <net/inet_ecn.h>
58 +#include <net/xfrm.h>
61 +#include <net/ipv6.h>
62 +#include <net/ip6_fib.h>
63 +#include <net/ip6_route.h>
66 +//#define GRE_DEBUG 1
69 + Problems & solutions
70 + --------------------
72 + 1. The most important issue is detecting local dead loops.
73 + They would cause complete host lockup in transmit, which
74 + would be "resolved" by stack overflow or, if queueing is enabled,
75 + with infinite looping in net_bh.
77 + We cannot track such dead loops during route installation,
78 + it is infeasible task. The most general solutions would be
79 + to keep skb->encapsulation counter (sort of local ttl),
80 + and silently drop packet when it expires. It is the best
81 + solution, but it supposes maintaing new variable in ALL
82 + skb, even if no tunneling is used.
84 + Current solution: t->recursion lock breaks dead loops. It looks
85 + like dev->tbusy flag, but I preferred new variable, because
86 + the semantics is different. One day, when hard_start_xmit
87 + will be multithreaded we will have to use skb->encapsulation.
91 + 2. Networking dead loops would not kill routers, but would really
92 + kill network. IP hop limit plays role of "t->recursion" in this case,
93 + if we copy it from packet being encapsulated to upper header.
94 + It is very good solution, but it introduces two problems:
96 + - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
97 + do not work over tunnels.
98 + - traceroute does not work. I planned to relay ICMP from tunnel,
99 + so that this problem would be solved and traceroute output
100 + would even more informative. This idea appeared to be wrong:
101 + only Linux complies to rfc1812 now (yes, guys, Linux is the only
102 + true router now :-)), all routers (at least, in neighbourhood of mine)
103 + return only 8 bytes of payload. It is the end.
105 + Hence, if we want that OSPF worked or traceroute said something reasonable,
106 + we should search for another solution.
108 + One of them is to parse packet trying to detect inner encapsulation
109 + made by our node. It is difficult or even impossible, especially,
110 + taking into account fragmentation. TO be short, tt is not solution at all.
112 + Current solution: The solution was UNEXPECTEDLY SIMPLE.
113 + We force DF flag on tunnels with preconfigured hop limit,
114 + that is ALL. :-) Well, it does not remove the problem completely,
115 + but exponential growth of network traffic is changed to linear
116 + (branches, that exceed pmtu are pruned) and tunnel mtu
117 + fastly degrades to value <68, where looping stops.
118 + Yes, it is not good if there exists a router in the loop,
119 + which does not force DF, even when encapsulating packets have DF set.
120 + But it is not our problem! Nobody could accuse us, we made
121 + all that we could make. Even if it is your gated who injected
122 + fatal route to network, even if it were you who configured
123 + fatal static route: you are innocent. :-)
127 + 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
128 + practically identical code. It would be good to glue them
129 + together, but it is not very evident, how to make them modular.
130 + sit is integral part of IPv6, ipip and gre are naturally modular.
131 + We could extract common parts (hash table, ioctl etc)
132 + to a separate module (ip_tunnel.c).
137 +static int ipgre_tunnel_init(struct net_device *dev);
138 +static void ipgre_ip_tunnel_setup(struct net_device *dev);
139 +static void ipgre_eth_tunnel_setup(struct net_device *dev);
141 +/* Fallback tunnel: no source, no destination, no key, no options */
143 +static int ipgre_fb_tunnel_init(struct net_device *dev);
145 +static struct net_device *ipgre_fb_tunnel_dev;
147 +/* Tunnel hash table */
157 + We require exact key match i.e. if a key is present in packet
158 + it will match only tunnel with the same key; if it is not present,
159 + it will match only keyless tunnel.
161 + All keysless packets, if not matched configured keyless tunnels
162 + will match fallback tunnel.
165 +#define HASH_SIZE 1024
166 +#define HASH(addr) (ntohl(addr)&1023)
168 +static struct ip_tunnel *tunnels[4][HASH_SIZE];
170 +#define tunnels_r_l (tunnels[3])
171 +#define tunnels_r (tunnels[2])
172 +#define tunnels_l (tunnels[1])
173 +#define tunnels_wc (tunnels[0])
175 +static DEFINE_RWLOCK(ipgre_lock);
177 +/* Given src, dst and key, find appropriate for input tunnel. */
179 +static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be32 key)
182 + unsigned hash_value = HASH(key);
183 + struct ip_tunnel *t;
185 + t = tunnels_r_l[hash_value];
187 + if (t && (t->parms.i_key == key) && (t->dev->flags&IFF_UP)) {
191 + t = tunnels_r[hash_value];
192 + if (t && (t->parms.i_key == key) && (t->dev->flags&IFF_UP))
195 + t = tunnels_l[hash_value];
196 + if (t && (t->parms.i_key == key) && (t->dev->flags&IFF_UP))
198 + t = tunnels_wc[hash_value];
199 + if (t && (t->parms.i_key == key) && (t->dev->flags&IFF_UP))
201 + if (ipgre_fb_tunnel_dev->flags&IFF_UP)
202 + return netdev_priv(ipgre_fb_tunnel_dev);
206 +static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
208 + __be32 remote = t->parms.iph.daddr;
209 + __be32 local = t->parms.iph.saddr;
210 + __be32 key = t->parms.i_key;
211 + unsigned h = HASH(key);
216 + if (remote && !MULTICAST(remote)) {
218 + //h ^= HASH(remote);
221 + return &tunnels[prio][h];
224 +static void ipgre_tunnel_link(struct ip_tunnel *t)
226 + struct ip_tunnel **tp = ipgre_bucket(t);
229 + write_lock_bh(&ipgre_lock);
231 + write_unlock_bh(&ipgre_lock);
234 +static void ipgre_tunnel_unlink(struct ip_tunnel *t)
236 + struct ip_tunnel **tp;
238 + for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) {
240 + write_lock_bh(&ipgre_lock);
242 + write_unlock_bh(&ipgre_lock);
248 +static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
250 + __be32 remote = parms->iph.daddr;
251 + __be32 local = parms->iph.saddr;
252 + __be32 key = parms->i_key;
253 + __be16 proto = parms->proto_type;
254 + struct ip_tunnel *t, **tp, *nt;
255 + struct net_device *dev;
256 + unsigned h = HASH(key);
258 + char name[IFNAMSIZ];
262 + if (remote && !MULTICAST(remote)) {
264 + //h ^= HASH(remote);
266 + for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
267 + if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
268 + if (key == t->parms.i_key)
275 + printk(KERN_CRIT "Adding tunnel %s with key %d\n", parms->name, ntohl(key));
277 + if (parms->name[0])
278 + strlcpy(name, parms->name, IFNAMSIZ);
281 + for (i=1; i<100; i++) {
282 + sprintf(name, "gre%d", i);
283 + if (__dev_get_by_name(&init_net, name) == NULL)
290 + /* Tunnel creation: check payload type and call appropriate
295 + dev = alloc_netdev(sizeof(*t), name, ipgre_ip_tunnel_setup);
298 + dev = alloc_netdev(sizeof(*t), name, ipgre_eth_tunnel_setup);
307 + dev->init = ipgre_tunnel_init;
308 + nt = netdev_priv(dev);
309 + nt->parms = *parms;
311 + if (register_netdevice(dev) < 0) {
317 + ipgre_tunnel_link(nt);
324 +static void ipgre_tunnel_uninit(struct net_device *dev)
326 + ipgre_tunnel_unlink(netdev_priv(dev));
331 +static void ipgre_err(struct sk_buff *skb, u32 info)
333 +#ifndef I_WISH_WORLD_WERE_PERFECT
335 +/* It is not :-( All the routers (except for Linux) return only
336 + 8 bytes of packet payload. It means, that precise relaying of
337 + ICMP in the real Internet is absolutely infeasible.
339 + Moreover, Cisco "wise men" put GRE key to the third word
340 + in GRE header. It makes impossible maintaining even soft state for keyed
341 + GRE tunnels with enabled checksum. Tell them "thank you".
343 + Well, I wonder, rfc1812 was written by Cisco employee,
344 + what the hell these idiots break standrads established
348 + struct iphdr *iph = (struct iphdr*)skb->data;
349 + __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
350 + int grehlen = (iph->ihl<<2) + 4;
351 + int type = icmp_hdr(skb)->type;
352 + int code = icmp_hdr(skb)->code;
353 + struct ip_tunnel *t;
357 + if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
358 + if (flags&(GRE_VERSION|GRE_ROUTING))
360 + if (flags&GRE_KEY) {
362 + if (flags&GRE_CSUM)
367 + /* If only 8 bytes returned, keyed message will be dropped here */
368 + if (skb_headlen(skb) < grehlen)
373 + case ICMP_PARAMETERPROB:
376 + case ICMP_DEST_UNREACH:
378 + case ICMP_SR_FAILED:
379 + case ICMP_PORT_UNREACH:
380 + /* Impossible event. */
382 + case ICMP_FRAG_NEEDED:
383 + /* Soft state for pmtu is maintained by IP core. */
386 + /* All others are translated to HOST_UNREACH.
387 + rfc2003 contains "deep thoughts" about NET_UNREACH,
388 + I believe they are just ether pollution. --ANK
393 + case ICMP_TIME_EXCEEDED:
394 + if (code != ICMP_EXC_TTL)
399 + read_lock(&ipgre_lock);
400 + t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0);
401 + if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr))
404 + if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
407 + if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
411 + t->err_time = jiffies;
413 + read_unlock(&ipgre_lock);
416 + struct iphdr *iph = (struct iphdr*)dp;
417 + struct iphdr *eiph;
418 + __be16 *p = (__be16*)(dp+(iph->ihl<<2));
419 + int type = skb->h.icmph->type;
420 + int code = skb->h.icmph->code;
423 + __be32 rel_info = 0;
426 + int grehlen = (iph->ihl<<2) + 4;
427 + struct sk_buff *skb2;
431 + if (skb->dev->nd_net != &init_net)
434 + if (p[1] != htons(ETH_P_IP))
438 + if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
439 + if (flags&(GRE_VERSION|GRE_ROUTING))
441 + if (flags&GRE_CSUM)
448 + if (len < grehlen + sizeof(struct iphdr))
450 + eiph = (struct iphdr*)(dp + grehlen);
455 + case ICMP_PARAMETERPROB:
456 + n = ntohl(skb->h.icmph->un.gateway) >> 24;
457 + if (n < (iph->ihl<<2))
460 + /* So... This guy found something strange INSIDE encapsulated
461 + packet. Well, he is fool, but what can we do ?
463 + rel_type = ICMP_PARAMETERPROB;
465 + rel_info = htonl(n << 24);
468 + case ICMP_DEST_UNREACH:
470 + case ICMP_SR_FAILED:
471 + case ICMP_PORT_UNREACH:
472 + /* Impossible event. */
474 + case ICMP_FRAG_NEEDED:
475 + /* And it is the only really necessary thing :-) */
476 + n = ntohs(skb->h.icmph->un.frag.mtu);
477 + if (n < grehlen+68)
480 + /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
481 + if (n > ntohs(eiph->tot_len))
483 + rel_info = htonl(n);
486 + /* All others are translated to HOST_UNREACH.
487 + rfc2003 contains "deep thoughts" about NET_UNREACH,
488 + I believe, it is just ether pollution. --ANK
490 + rel_type = ICMP_DEST_UNREACH;
491 + rel_code = ICMP_HOST_UNREACH;
495 + case ICMP_TIME_EXCEEDED:
496 + if (code != ICMP_EXC_TTL)
501 + /* Prepare fake skb to feed it to icmp_send */
502 + skb2 = skb_clone(skb, GFP_ATOMIC);
505 + dst_release(skb2->dst);
507 + skb_pull(skb2, skb->data - (u8*)eiph);
508 + skb_reset_network_header(skb2);
510 + /* Try to guess incoming interface */
511 + memset(&fl, 0, sizeof(fl));
512 + fl.fl_net = &init_net;
513 + fl.fl4_dst = eiph->saddr;
514 + fl.fl4_tos = RT_TOS(eiph->tos);
515 + fl.proto = IPPROTO_GRE;
516 + if (ip_route_output_key(&rt, &fl)) {
520 + skb2->dev = rt->u.dst.dev;
522 + /* route "incoming" packet */
523 + if (rt->rt_flags&RTCF_LOCAL) {
526 + fl.fl4_dst = eiph->daddr;
527 + fl.fl4_src = eiph->saddr;
528 + fl.fl4_tos = eiph->tos;
529 + if (ip_route_output_key(&rt, &fl) ||
530 + rt->u.dst.dev->type != ARPHRD_IPGRE) {
537 + if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
538 + skb2->dst->dev->type != ARPHRD_IPGRE) {
544 + /* change mtu on this route */
545 + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
546 + if (n > dst_mtu(skb2->dst)) {
550 + skb2->dst->ops->update_pmtu(skb2->dst, n);
551 + } else if (type == ICMP_TIME_EXCEEDED) {
552 + struct ip_tunnel *t = netdev_priv(skb2->dev);
553 + if (t->parms.iph.ttl) {
554 + rel_type = ICMP_DEST_UNREACH;
555 + rel_code = ICMP_HOST_UNREACH;
559 + icmp_send(skb2, rel_type, rel_code, rel_info);
564 +static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
566 + if (INET_ECN_is_ce(iph->tos)) {
567 + if (skb->protocol == htons(ETH_P_IP)) {
568 + IP_ECN_set_ce(ip_hdr(skb));
569 + } else if (skb->protocol == htons(ETH_P_IPV6)) {
570 + IP6_ECN_set_ce(ipv6_hdr(skb));
576 +ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
579 + if (skb->protocol == htons(ETH_P_IP))
580 + inner = old_iph->tos;
581 + else if (skb->protocol == htons(ETH_P_IPV6))
582 + inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
583 + return INET_ECN_encapsulate(tos, inner);
586 +static int ipgre_rcv(struct sk_buff *skb)
594 + struct ip_tunnel *tunnel;
598 + if (skb->dev->nd_net != &init_net) {
602 + if (!pskb_may_pull(skb, 16))
607 + flags = *(__be16*)h;
610 + printk(KERN_DEBUG "gre.c [601] src:%x dst:%x proto:%d %x", iph->saddr, iph->daddr, iph->protocol, skb->data);
612 + proto = ntohs(*(__be16*)(h+2)); /* XXX added XXX */
614 + if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
615 + /* - Version must be 0.
616 + - We do not support routing headers.
618 + if (flags&(GRE_VERSION|GRE_ROUTING))
621 + if (flags&GRE_CSUM) {
622 + switch (skb->ip_summed) {
623 + case CHECKSUM_COMPLETE:
624 + csum = csum_fold(skb->csum);
628 + case CHECKSUM_NONE:
630 + csum = __skb_checksum_complete(skb);
631 + skb->ip_summed = CHECKSUM_COMPLETE;
635 + if (flags&GRE_KEY) {
636 + key = *(__be32*)(h + offset);
639 + if (flags&GRE_SEQ) {
640 + seqno = ntohl(*(__be32*)(h + offset));
645 + read_lock(&ipgre_lock);
646 + if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
647 + secpath_reset(skb);
649 + skb->protocol = *(__be16*)(h + 2);
650 + /* WCCP version 1 and 2 protocol decoding.
651 + * - Change protocol to IP
652 + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
655 + skb->protocol == htons(ETH_P_WCCP)) {
656 + skb->protocol = htons(ETH_P_IP);
657 + if ((*(h + offset) & 0xF0) != 0x40)
661 + //skb->mac.raw = skb->nh.raw;
662 + skb_reset_mac_header(skb);
663 + __pskb_pull(skb, offset);
664 + skb_reset_network_header(skb);
665 + skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
666 + if(proto == ETH_P_ETH)
669 + unsigned char* tmp_hdr = skb->data;
670 + printk(KERN_DEBUG "gre.c [658] %x %x %x %x %x %x\tskb %x\n", tmp_hdr[0], tmp_hdr[1], tmp_hdr[2], tmp_hdr[3], tmp_hdr[4], tmp_hdr[5], skb->data);
672 + skb->protocol = eth_type_trans(skb, tunnel->dev);
674 + /* XXX added these lines to make arp work? XXX */
675 + /*skb->mac.raw = skb->data;*/
676 + skb->network_header = skb->network_header + ETH_HLEN;
677 + /* XXX added these lines to make arp work? XXX */
680 + tmp_hdr = skb->data;
681 + printk(KERN_DEBUG "gre.c [669] %x %x %x %x %x %x\tskb %x\n", tmp_hdr[0], tmp_hdr[1], tmp_hdr[2], tmp_hdr[3], tmp_hdr[4], tmp_hdr[5], skb->data);
682 + printk(KERN_ALERT "gre.c [671] received ethernet on gre %x %x\n",skb->protocol, ((skb->nh).iph)->protocol);
684 + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
687 + skb->pkt_type = PACKET_HOST;
688 +#ifdef CONFIG_NET_IPGRE_BROADCAST
689 + if (MULTICAST(iph->daddr)) {
690 + /* Looped back packet, drop it! */
691 + if (((struct rtable*)skb->dst)->fl.iif == 0)
693 + tunnel->stat.multicast++;
694 + skb->pkt_type = PACKET_BROADCAST;
698 + if (((flags&GRE_CSUM) && csum) ||
699 + (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
700 + tunnel->stat.rx_crc_errors++;
701 + tunnel->stat.rx_errors++;
704 + if (tunnel->parms.i_flags&GRE_SEQ) {
705 + if (!(flags&GRE_SEQ) ||
706 + (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
707 + tunnel->stat.rx_fifo_errors++;
708 + tunnel->stat.rx_errors++;
711 + tunnel->i_seqno = seqno + 1;
713 + tunnel->stat.rx_packets++;
714 + tunnel->stat.rx_bytes += skb->len;
715 + skb->dev = tunnel->dev;
716 + dst_release(skb->dst);
719 + ipgre_ecn_decapsulate(iph, skb);
721 + read_unlock(&ipgre_lock);
724 + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
727 + read_unlock(&ipgre_lock);
733 +static int ipgre_ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
735 + struct ip_tunnel *tunnel = netdev_priv(dev);
736 + struct net_device_stats *stats = &tunnel->stat;
737 + struct iphdr *old_iph = ip_hdr(skb);
738 + struct iphdr *tiph;
741 + struct rtable *rt; /* Route to the other host */
742 + struct net_device *tdev; /* Device to other host */
743 + struct iphdr *iph; /* Our new IP header */
744 + int max_headroom; /* The extra header space needed */
749 + if (tunnel->recursion++) {
750 + tunnel->stat.collisions++;
754 + if (dev->hard_header) {
756 + tiph = (struct iphdr*)skb->data;
758 + gre_hlen = tunnel->hlen;
759 + tiph = &tunnel->parms.iph;
762 + if ((dst = tiph->daddr) == 0) {
765 + if (skb->dst == NULL) {
766 + tunnel->stat.tx_fifo_errors++;
770 + if (skb->protocol == htons(ETH_P_IP)) {
771 + rt = (struct rtable*)skb->dst;
772 + if ((dst = rt->rt_gateway) == 0)
773 + goto tx_error_icmp;
776 + else if (skb->protocol == htons(ETH_P_IPV6)) {
777 + struct in6_addr *addr6;
779 + struct neighbour *neigh = skb->dst->neighbour;
784 + addr6 = (struct in6_addr*)&neigh->primary_key;
785 + addr_type = ipv6_addr_type(addr6);
787 + if (addr_type == IPV6_ADDR_ANY) {
788 + addr6 = &ipv6_hdr(skb)->daddr;
789 + addr_type = ipv6_addr_type(addr6);
792 + if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
793 + goto tx_error_icmp;
803 + if (skb->protocol == htons(ETH_P_IP))
804 + tos = old_iph->tos;
809 + struct flowi fl = { .fl_net = &init_net,
810 + .oif = tunnel->parms.link,
813 + .saddr = tiph->saddr,
814 + .tos = RT_TOS(tos) } },
815 + .proto = IPPROTO_GRE };
816 + if (ip_route_output_key(&rt, &fl)) {
817 + tunnel->stat.tx_carrier_errors++;
821 + tdev = rt->u.dst.dev;
826 + tunnel->stat.collisions++;
830 + df = tiph->frag_off;
832 + mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
834 + mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
837 + skb->dst->ops->update_pmtu(skb->dst, mtu);
839 + if (skb->protocol == htons(ETH_P_IP)) {
840 + df |= (old_iph->frag_off&htons(IP_DF));
842 + if ((old_iph->frag_off&htons(IP_DF)) &&
843 + mtu < ntohs(old_iph->tot_len)) {
844 + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
850 + else if (skb->protocol == htons(ETH_P_IPV6)) {
851 + struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
853 + if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
854 + if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) ||
855 + rt6->rt6i_dst.plen == 128) {
856 + rt6->rt6i_flags |= RTF_MODIFIED;
857 + skb->dst->metrics[RTAX_MTU-1] = mtu;
861 + if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
862 + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
869 + if (tunnel->err_count > 0) {
870 + if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
871 + tunnel->err_count--;
873 + dst_link_failure(skb);
875 + tunnel->err_count = 0;
878 + max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
880 + if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
881 + struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
884 + stats->tx_dropped++;
885 + dev_kfree_skb(skb);
886 + tunnel->recursion--;
890 + skb_set_owner_w(new_skb, skb->sk);
891 + dev_kfree_skb(skb);
893 + old_iph = ip_hdr(skb);
896 + skb->transport_header = skb->network_header;
897 + skb_push(skb, gre_hlen);
898 + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
899 + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
901 + dst_release(skb->dst);
902 + skb->dst = &rt->u.dst;
905 + * Push down and install the IPIP header.
910 + iph->ihl = sizeof(struct iphdr) >> 2;
911 + iph->frag_off = df;
912 + iph->protocol = IPPROTO_GRE;
913 + iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
914 + iph->daddr = rt->rt_dst;
915 + iph->saddr = rt->rt_src;
917 + if ((iph->ttl = tiph->ttl) == 0) {
918 + if (skb->protocol == htons(ETH_P_IP))
919 + iph->ttl = old_iph->ttl;
921 + else if (skb->protocol == htons(ETH_P_IPV6))
922 + iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
925 + iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
928 + ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
929 + ((__be16*)(iph+1))[1] = skb->protocol;
931 + if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
932 + __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
934 + if (tunnel->parms.o_flags&GRE_SEQ) {
936 + *ptr = htonl(tunnel->o_seqno);
939 + if (tunnel->parms.o_flags&GRE_KEY) {
940 + *ptr = tunnel->parms.o_key;
943 + if (tunnel->parms.o_flags&GRE_CSUM) {
945 + *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
952 + tunnel->recursion--;
956 + dst_link_failure(skb);
959 + stats->tx_errors++;
960 + dev_kfree_skb(skb);
961 + tunnel->recursion--;
965 +static int ipgre_eth_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
967 + struct ip_tunnel *tunnel = netdev_priv(dev);
968 + struct net_device_stats *stats = &tunnel->stat;
969 + struct iphdr *old_iph = ip_hdr(skb);
970 + struct iphdr *tiph = &tunnel->parms.iph;
973 + struct rtable *rt; /* Route to the other host */
974 + struct net_device *tdev; /* Device to other host */
975 + int gre_hlen = tunnel->hlen; /* XXX changed XXX*/
976 + //struct etheriphdr *ethiph;
977 + struct iphdr *iph; /* Our new IP header */
978 + int max_headroom; /* The extra header space needed */
982 + printk(KERN_ALERT "gre.c:972 Starting xmit\n");
985 + if (tunnel->recursion++) {
986 + stats->collisions++;
990 + /* Need valid non-multicast daddr. */
991 + if (tiph->daddr == 0 || MULTICAST(tiph->daddr))
996 + if (skb->protocol == htons(ETH_P_IP))
997 + tos = old_iph->tos;
1001 + printk(KERN_ALERT "gre.c:991 Passed tos assignment.\n");
1006 + struct flowi fl = { .fl_net = &init_net,
1007 + .oif = tunnel->parms.link,
1008 + .nl_u = { .ip4_u =
1009 + { .daddr = tiph->daddr,
1010 + .saddr = tiph->saddr,
1011 + .tos = RT_TOS(tos) } },
1012 + .proto = IPPROTO_GRE };
1013 + if (ip_route_output_key(&rt, &fl)) {
1014 + stats->tx_carrier_errors++;
1015 + goto tx_error_icmp;
1018 + tdev = rt->u.dst.dev;
1020 + printk(KERN_ALERT "gre.c:1006 Passed the route retrieval\n");
1022 + if (tdev == dev) {
1024 + stats->collisions++;
1028 + printk(KERN_ALERT "gre.c:1018 Passed tdev collision check.\n");
1031 + /* Check MTU stuff if kernel panic */
1032 + df = tiph->frag_off;
1034 + mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
1036 + mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
1039 + skb->dst->ops->update_pmtu(skb->dst, mtu);
1042 + printk(KERN_ALERT "gre.c:1032 Passed the pmtu setting.\n");
1045 + if (skb->protocol == htons(ETH_P_IP)) {
1046 + df |= (old_iph->frag_off&htons(IP_DF));
1048 + if ((old_iph->frag_off & htons(IP_DF)) &&
1049 + mtu < ntohs(old_iph->tot_len)) {
1050 + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1056 + else if (skb->protocol == htons(ETH_P_IPV6)) {
1057 + struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
1059 + if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
1060 + if (tiph->daddr || rt6->rt6i_dst.plen == 128) {
1061 + rt6->rt6i_flags |= RTF_MODIFIED;
1062 + skb->dst->metrics[RTAX_MTU-1] = mtu;
1066 + /* @@@ Is this correct? */
1067 + if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
1068 + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
1075 + printk(KERN_ALERT "gre.c:1065 Passed the fragmentation check.\n");
1078 + if (tunnel->err_count > 0) {
1079 + if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
1080 + tunnel->err_count--;
1081 + dst_link_failure(skb);
1083 + tunnel->err_count = 0;
1086 + max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
1088 + if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
1089 + struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
1092 + stats->tx_dropped++;
1093 + dev_kfree_skb(skb);
1094 + tunnel->recursion--;
1098 + skb_set_owner_w(new_skb, skb->sk);
1099 + dev_kfree_skb(skb);
1101 + old_iph = ip_hdr(skb);
1104 + printk(KERN_ALERT "gre.c:1094 Passed the headroom calculation\n");
1108 + skb->transport_header = skb->mac_header; // Added by valas
1109 + skb_push(skb, gre_hlen);
1110 + skb_reset_network_header(skb);
1111 + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1112 + dst_release(skb->dst);
1113 + skb->dst = &rt->u.dst;
1116 + * Push down and install the etherip header.
1119 + iph = ip_hdr(skb);
1121 + iph->ihl = sizeof(struct iphdr) >> 2;
1122 + iph->frag_off = df;
1123 + iph->protocol = IPPROTO_GRE;
1124 + iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
1125 + iph->daddr = rt->rt_dst;
1126 + iph->saddr = rt->rt_src;
1128 +/* ethiph->version = htons(ETHERIP_VERSION); */
1130 + printk(KERN_ALERT "gre.c:1121 Passed outer IP header construction.\n");
1133 + if ((iph->ttl = tiph->ttl) == 0) {
1134 + if (skb->protocol == htons(ETH_P_IP))
1135 + iph->ttl = old_iph->ttl;
1137 + else if (skb->protocol == htons(ETH_P_IPV6))
1138 + iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
1141 + iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
1144 + printk(KERN_ALERT "gre.c:1006 Passed the TTL check.\n");
1147 + ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
1148 + ((__be16*)(iph+1))[1] = htons(tunnel->parms.proto_type);
1150 + if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
1151 + __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1153 + if (tunnel->parms.o_flags&GRE_SEQ) {
1154 + ++tunnel->o_seqno;
1155 + *ptr = htonl(tunnel->o_seqno);
1158 + if (tunnel->parms.o_flags&GRE_KEY) {
1159 + *ptr = tunnel->parms.o_key;
1162 + if (tunnel->parms.o_flags&GRE_CSUM) {
1164 + *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
1168 + printk(KERN_ALERT "gre.c:1006 Passed the tunnel transmit.\n");
1174 + tunnel->recursion--;
1178 + dst_link_failure(skb);
1181 + stats->tx_errors++;
1182 + dev_kfree_skb(skb);
1183 + tunnel->recursion--;
1189 +ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1192 + struct ip_tunnel_parm p;
1193 + struct ip_tunnel *t;
1195 + printk(KERN_ALERT "1174 GRE: entering gre ioctl. command is: %d\n", cmd);
1198 + case SIOCGETTUNNEL:
1200 + if (dev == ipgre_fb_tunnel_dev) {
1201 + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1205 + t = ipgre_tunnel_locate(&p, 0);
1208 + t = netdev_priv(dev);
1209 + memcpy(&p, &t->parms, sizeof(p));
1210 + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1214 + case SIOCADDTUNNEL:
1215 + case SIOCCHGTUNNEL:
1217 + if (!capable(CAP_NET_ADMIN))
1221 + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1225 + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1226 + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1227 + ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1230 + p.iph.frag_off |= htons(IP_DF);
1232 + if (!(p.i_flags&GRE_KEY))
1234 + if (!(p.o_flags&GRE_KEY))
1237 + t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
1238 + if (t) printk(KERN_ALERT "1174 GRE: proto %s %d\n", p.name, p.proto_type);
1239 + if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1241 + if (t->dev != dev) {
1246 + unsigned nflags=0;
1248 + t = netdev_priv(dev);
1250 + if (MULTICAST(p.iph.daddr))
1251 + nflags = IFF_BROADCAST;
1252 + else if (p.iph.daddr)
1253 + nflags = IFF_POINTOPOINT;
1255 + /* XXX:Set back IFF_BROADCAST if
1256 + * transporting ethernet */
1257 + printk(KERN_ALERT "1193 GRE: proto %s %d\n", p.name, p.proto_type);
1258 + if (p.proto_type == ETH_P_ETH)
1259 + nflags = IFF_BROADCAST;
1261 + if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1265 + ipgre_tunnel_unlink(t);
1266 + t->parms.iph.saddr = p.iph.saddr;
1267 + t->parms.iph.daddr = p.iph.daddr;
1268 + t->parms.i_key = p.i_key;
1269 + t->parms.o_key = p.o_key;
1270 + /* XXX:Copy in the protocol field */
1271 + t->parms.proto_type = p.proto_type;
1272 + if (t->parms.proto_type != ETH_P_ETH)
1274 + memcpy(dev->dev_addr, &p.iph.saddr, 4);
1275 + memcpy(dev->broadcast, &p.iph.daddr, 4);
1277 + ipgre_tunnel_link(t);
1278 + netdev_state_change(dev);
1284 + if (cmd == SIOCCHGTUNNEL) {
1285 + t->parms.iph.ttl = p.iph.ttl;
1286 + t->parms.iph.tos = p.iph.tos;
1287 + t->parms.iph.frag_off = p.iph.frag_off;
1289 + if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1292 + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1295 + case SIOCDELTUNNEL:
1297 + if (!capable(CAP_NET_ADMIN))
1300 + if (dev == ipgre_fb_tunnel_dev) {
1302 + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1305 + if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
1308 + if (t == netdev_priv(ipgre_fb_tunnel_dev))
1312 + unregister_netdevice(dev); // added by Valas
1323 +static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1325 + return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1328 +static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1330 + struct ip_tunnel *tunnel = netdev_priv(dev);
1331 + if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1333 + dev->mtu = new_mtu;
1337 +#ifdef CONFIG_NET_IPGRE_BROADCAST
1338 +/* Nice toy. Unfortunately, useless in real life :-)
1339 + It allows to construct virtual multiprotocol broadcast "LAN"
1340 + over the Internet, provided multicast routing is tuned.
1343 + I have no idea was this bicycle invented before me,
1344 + so that I had to set ARPHRD_IPGRE to a random value.
1345 + I have an impression, that Cisco could make something similar,
1346 + but this feature is apparently missing in IOS<=11.2(8).
1348 + I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1349 + with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1351 + ping -t 255 224.66.66.66
1353 + If nobody answers, mbone does not work.
1355 + ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1356 + ip addr add 10.66.66.<somewhat>/24 dev Universe
1357 + ifconfig Universe up
1358 + ifconfig Universe add fe80::<Your_real_addr>/10
1359 + ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1362 + ftp fec0:6666:6666::193.233.7.65
1367 +static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
1368 + void *daddr, void *saddr, unsigned len)
1370 + struct ip_tunnel *t = netdev_priv(dev);
1371 + struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1372 + __be16 *p = (__be16*)(iph+1);
1374 + memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1375 + p[0] = t->parms.o_flags;
1376 + p[1] = htons(type);
1379 + * Set the source hardware address.
1383 + memcpy(&iph->saddr, saddr, 4);
1386 + memcpy(&iph->daddr, daddr, 4);
1389 + if (iph->daddr && !MULTICAST(iph->daddr))
1395 +static int ipgre_open(struct net_device *dev)
1397 + struct ip_tunnel *t = netdev_priv(dev);
1399 + if (MULTICAST(t->parms.iph.daddr)) {
1400 + struct flowi fl = { .fl_net = &init_net,
1401 + .oif = t->parms.link,
1402 + .nl_u = { .ip4_u =
1403 + { .daddr = t->parms.iph.daddr,
1404 + .saddr = t->parms.iph.saddr,
1405 + .tos = RT_TOS(t->parms.iph.tos) } },
1406 + .proto = IPPROTO_GRE };
1407 + struct rtable *rt;
1408 + if (ip_route_output_key(&rt, &fl))
1409 + return -EADDRNOTAVAIL;
1410 + dev = rt->u.dst.dev;
1412 + if (__in_dev_get_rtnl(dev) == NULL)
1413 + return -EADDRNOTAVAIL;
1414 + t->mlink = dev->ifindex;
1415 + ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1420 +static int ipgre_close(struct net_device *dev)
1422 + struct ip_tunnel *t = netdev_priv(dev);
1423 + if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
1424 + struct in_device *in_dev = inetdev_by_index(&init_net, t->mlink);
1426 + ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1427 + in_dev_put(in_dev);
1435 +static void ipgre_ip_tunnel_setup(struct net_device *dev)
1437 + SET_MODULE_OWNER(dev);
1438 + dev->uninit = ipgre_tunnel_uninit;
1439 + dev->destructor = free_netdev;
1440 + dev->hard_start_xmit = ipgre_ip_tunnel_xmit;
1441 + dev->get_stats = ipgre_tunnel_get_stats;
1442 + dev->do_ioctl = ipgre_tunnel_ioctl;
1443 + dev->change_mtu = ipgre_tunnel_change_mtu;
1445 + dev->type = ARPHRD_IPGRE;
1446 + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1447 + dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1448 + dev->flags = IFF_NOARP;
1450 + dev->addr_len = 4;
1453 +/* Tunnel setup for ipgre_eth */
1454 +static void ipgre_eth_tunnel_setup(struct net_device *dev)
1456 + SET_MODULE_OWNER(dev);
1459 + dev->uninit = ipgre_tunnel_uninit;
1460 + dev->destructor = free_netdev;
1461 + dev->hard_start_xmit = ipgre_eth_tunnel_xmit;
1462 + dev->get_stats = ipgre_tunnel_get_stats;
1463 + dev->do_ioctl = ipgre_tunnel_ioctl;
1464 + dev->change_mtu = ipgre_tunnel_change_mtu;
1466 + dev->hard_header_len = ETH_HLEN + sizeof(struct iphdr) + 4;
1467 + dev->tx_queue_len = 0;
1468 + random_ether_addr(dev->dev_addr);
1471 + unsigned char* d = dev->dev_addr;
1472 + printk(KERN_ALERT "Here is the address we got:%x%x%x%x%x%x\n",d[0],d[1],d[2],d[3],d[4],d[5]);
1479 +static int ipgre_tunnel_init(struct net_device *dev)
1481 + struct net_device *tdev = NULL;
1482 + struct ip_tunnel *tunnel;
1483 + struct iphdr *iph;
1484 + int hlen = LL_MAX_HEADER;
1485 + int mtu = ETH_DATA_LEN;
1486 + int addend = sizeof(struct iphdr) + 4;
1488 + tunnel = netdev_priv(dev);
1489 + iph = &tunnel->parms.iph;
1491 + tunnel->dev = dev;
1492 + strcpy(tunnel->parms.name, dev->name);
1494 + if (tunnel->parms.proto_type != ETH_P_ETH)
1496 + memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1497 + memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1500 + /* Guess output device to choose reasonable mtu and hard_header_len */
1503 + struct flowi fl = { .fl_net = &init_net,
1504 + .oif = tunnel->parms.link,
1505 + .nl_u = { .ip4_u =
1506 + { .daddr = iph->daddr,
1507 + .saddr = iph->saddr,
1508 + .tos = RT_TOS(iph->tos) } },
1509 + .proto = IPPROTO_GRE };
1510 + struct rtable *rt;
1511 + if (!ip_route_output_key(&rt, &fl)) {
1512 + tdev = rt->u.dst.dev;
1516 + if (tunnel->parms.proto_type == ETH_P_ETH)
1518 + dev->flags |= IFF_BROADCAST;
1522 + dev->flags |= IFF_POINTOPOINT;
1525 +#ifdef CONFIG_NET_IPGRE_BROADCAST
1526 + if (MULTICAST(iph->daddr)) {
1529 + dev->flags = IFF_BROADCAST;
1530 + dev->hard_header = ipgre_header;
1531 + dev->open = ipgre_open;
1532 + dev->stop = ipgre_close;
1537 + if (!tdev && tunnel->parms.link)
1538 + tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
1541 + hlen = tdev->hard_header_len;
1544 + dev->iflink = tunnel->parms.link;
1546 + /* Precalculate GRE options length */
1547 + if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1548 + if (tunnel->parms.o_flags&GRE_CSUM)
1550 + if (tunnel->parms.o_flags&GRE_KEY)
1552 + if (tunnel->parms.o_flags&GRE_SEQ)
1555 + dev->hard_header_len = hlen + addend;
1556 + dev->mtu = mtu - addend;
1557 + tunnel->hlen = addend;
1561 +static int __init ipgre_fb_tunnel_init(struct net_device *dev)
1563 + struct ip_tunnel *tunnel = netdev_priv(dev);
1564 + struct iphdr *iph = &tunnel->parms.iph;
1566 + tunnel->dev = dev;
1567 + strcpy(tunnel->parms.name, dev->name);
1570 + iph->protocol = IPPROTO_GRE;
1572 + tunnel->hlen = sizeof(struct iphdr) + 4;
1575 + tunnels_wc[0] = tunnel;
1580 +static struct net_protocol ipgre_protocol = {
1581 + .handler = ipgre_rcv,
1582 + .err_handler = ipgre_err,
1587 + * And now the modules code and kernel interface.
1590 +static int __init ipgre_init(void)
1594 + printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1596 + if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1597 + printk(KERN_INFO "ipgre init: can't add protocol\n");
1601 + ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1602 + ipgre_ip_tunnel_setup);
1603 + if (!ipgre_fb_tunnel_dev) {
1608 + ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1610 + if ((err = register_netdev(ipgre_fb_tunnel_dev)))
1615 + free_netdev(ipgre_fb_tunnel_dev);
1617 + inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1621 +static void __exit ipgre_destroy_tunnels(void)
1625 + for (prio = 0; prio < 4; prio++) {
1627 + for (h = 0; h < HASH_SIZE; h++) {
1628 + struct ip_tunnel *t;
1629 + while ((t = tunnels[prio][h]) != NULL)
1630 + unregister_netdevice(t->dev);
1635 +static void __exit ipgre_fini(void)
1637 + if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1638 + printk(KERN_INFO "ipgre close: can't remove protocol\n");
1641 + ipgre_destroy_tunnels();
1645 +module_init(ipgre_init);
1646 +module_exit(ipgre_fini);
1647 +MODULE_LICENSE("GPL");
1648 diff -Nurb linux-2.6.22-592/include/linux/if_ether.h linux-2.6.22-593/include/linux/if_ether.h
1649 --- linux-2.6.22-592/include/linux/if_ether.h 2007-07-08 19:32:17.000000000 -0400
1650 +++ linux-2.6.22-593/include/linux/if_ether.h 2008-03-15 10:51:27.000000000 -0400
1652 #define ETH_P_DIAG 0x6005 /* DEC Diagnostics */
1653 #define ETH_P_CUST 0x6006 /* DEC Customer use */
1654 #define ETH_P_SCA 0x6007 /* DEC Systems Comms Arch */
1655 +#define ETH_P_ETH 0x6558 /* Ethernet in Ethernet */
1656 #define ETH_P_RARP 0x8035 /* Reverse Addr Res packet */
1657 #define ETH_P_ATALK 0x809B /* Appletalk DDP */
1658 #define ETH_P_AARP 0x80F3 /* Appletalk AARP */
1659 diff -Nurb linux-2.6.22-592/include/linux/if_tunnel.h linux-2.6.22-593/include/linux/if_tunnel.h
1660 --- linux-2.6.22-592/include/linux/if_tunnel.h 2007-07-08 19:32:17.000000000 -0400
1661 +++ linux-2.6.22-593/include/linux/if_tunnel.h 2008-03-15 10:51:27.000000000 -0400
1666 + __be16 proto_type; /*Added*/
1670 diff -Nurb linux-2.6.22-592/net/ipv4/ip_gre.c linux-2.6.22-593/net/ipv4/ip_gre.c
1671 --- linux-2.6.22-592/net/ipv4/ip_gre.c 2007-07-08 19:32:17.000000000 -0400
1672 +++ linux-2.6.22-593/net/ipv4/ip_gre.c 2008-03-15 10:51:27.000000000 -0400
1674 #include <linux/init.h>
1675 #include <linux/in6.h>
1676 #include <linux/inetdevice.h>
1677 +#include <linux/etherdevice.h> /**XXX added XXX */
1678 #include <linux/igmp.h>
1679 #include <linux/netfilter_ipv4.h>
1680 #include <linux/if_ether.h>
1682 #include <net/ip6_route.h>
1685 +//#define GRE_DEBUG 1
1688 Problems & solutions
1689 --------------------
1693 static int ipgre_tunnel_init(struct net_device *dev);
1694 -static void ipgre_tunnel_setup(struct net_device *dev);
1695 +static void ipgre_ip_tunnel_setup(struct net_device *dev);
1696 +static void ipgre_eth_tunnel_setup(struct net_device *dev);
1698 /* Fallback tunnel: no source, no destination, no key, no options */
1701 __be32 remote = parms->iph.daddr;
1702 __be32 local = parms->iph.saddr;
1703 __be32 key = parms->i_key;
1704 + __be16 proto = parms->proto_type;
1705 struct ip_tunnel *t, **tp, *nt;
1706 struct net_device *dev;
1707 char name[IFNAMSIZ];
1712 + printk(KERN_CRIT "Adding tunnel %s with key %d\n", parms->name, ntohl(key));
1715 strlcpy(name, parms->name, IFNAMSIZ);
1717 @@ -269,7 +276,20 @@
1721 - dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
1722 + /* Tunnel creation: check payload type and call appropriate
1727 + dev = alloc_netdev(sizeof(*t), name, ipgre_ip_tunnel_setup);
1730 + dev = alloc_netdev(sizeof(*t), name, ipgre_eth_tunnel_setup);
1741 struct ip_tunnel *tunnel;
1745 if (!pskb_may_pull(skb, 16))
1747 @@ -566,6 +587,11 @@
1749 flags = *(__be16*)h;
1752 + printk(KERN_DEBUG "gre.c [601] src:%x dst:%x proto:%d %p", iph->saddr, iph->daddr, iph->protocol, skb->data);
1754 + proto = ntohs(*(__be16*)(h+2)); /* XXX added XXX */
1756 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
1757 /* - Version must be 0.
1758 - We do not support routing headers.
1759 @@ -617,6 +643,27 @@
1760 __pskb_pull(skb, offset);
1761 skb_reset_network_header(skb);
1762 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
1763 + if(proto == ETH_P_ETH)
1766 + unsigned char* tmp_hdr = skb->data;
1767 + printk(KERN_DEBUG "gre.c [658] %x %x %x %x %x %x\tskb %p\n", tmp_hdr[0], tmp_hdr[1], tmp_hdr[2], tmp_hdr[3], tmp_hdr[4], tmp_hdr[5], skb->data);
1769 + skb->protocol = eth_type_trans(skb, tunnel->dev);
1771 + /* XXX added these lines to make arp work? XXX */
1772 + /*skb->mac.raw = skb->data;*/
1773 + skb->network_header = skb->network_header + ETH_HLEN;
1774 + /* XXX added these lines to make arp work? XXX */
1777 + tmp_hdr = skb->data;
1778 + printk(KERN_DEBUG "gre.c [669] %x %x %x %x %x %x\tskb %p\n", tmp_hdr[0], tmp_hdr[1], tmp_hdr[2], tmp_hdr[3], tmp_hdr[4], tmp_hdr[5], skb->data);
1779 + printk(KERN_ALERT "gre.c [671] received ethernet on gre %x\n",skb->protocol);
1781 + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1784 skb->pkt_type = PACKET_HOST;
1785 #ifdef CONFIG_NET_IPGRE_BROADCAST
1786 if (MULTICAST(iph->daddr)) {
1791 -static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
1792 +static int ipgre_ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
1794 struct ip_tunnel *tunnel = netdev_priv(dev);
1795 struct net_device_stats *stats = &tunnel->stat;
1796 @@ -895,6 +942,228 @@
1800 +static int ipgre_eth_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
1802 + struct ip_tunnel *tunnel = netdev_priv(dev);
1803 + struct net_device_stats *stats = &tunnel->stat;
1804 + struct iphdr *old_iph = ip_hdr(skb);
1805 + struct iphdr *tiph = &tunnel->parms.iph;
1808 + struct rtable *rt; /* Route to the other host */
1809 + struct net_device *tdev; /* Device to other host */
1810 + int gre_hlen = tunnel->hlen; /* XXX changed XXX*/
1811 + //struct etheriphdr *ethiph;
1812 + struct iphdr *iph; /* Our new IP header */
1813 + int max_headroom; /* The extra header space needed */
1817 + printk(KERN_ALERT "gre.c:972 Starting xmit\n");
1820 + if (tunnel->recursion++) {
1821 + stats->collisions++;
1825 + /* Need valid non-multicast daddr. */
1826 + if (tiph->daddr == 0 || MULTICAST(tiph->daddr))
1831 + if (skb->protocol == htons(ETH_P_IP))
1832 + tos = old_iph->tos;
1836 + printk(KERN_ALERT "gre.c:991 Passed tos assignment.\n");
1841 + struct flowi fl = { .fl_net = &init_net,
1842 + .oif = tunnel->parms.link,
1843 + .nl_u = { .ip4_u =
1844 + { .daddr = tiph->daddr,
1845 + .saddr = tiph->saddr,
1846 + .tos = RT_TOS(tos) } },
1847 + .proto = IPPROTO_GRE };
1848 + if (ip_route_output_key(&rt, &fl)) {
1849 + stats->tx_carrier_errors++;
1850 + goto tx_error_icmp;
1853 + tdev = rt->u.dst.dev;
1855 + printk(KERN_ALERT "gre.c:1006 Passed the route retrieval\n");
1857 + if (tdev == dev) {
1859 + stats->collisions++;
1863 + printk(KERN_ALERT "gre.c:1018 Passed tdev collision check.\n");
1866 + /* Check MTU stuff if kernel panic */
1867 + df = tiph->frag_off;
1869 + mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
1871 + mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
1874 + skb->dst->ops->update_pmtu(skb->dst, mtu);
1877 + printk(KERN_ALERT "gre.c:1032 Passed the pmtu setting.\n");
1880 + if (skb->protocol == htons(ETH_P_IP)) {
1881 + df |= (old_iph->frag_off&htons(IP_DF));
1883 + if ((old_iph->frag_off & htons(IP_DF)) &&
1884 + mtu < ntohs(old_iph->tot_len)) {
1885 + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1891 + else if (skb->protocol == htons(ETH_P_IPV6)) {
1892 + struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
1894 + if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
1895 + if (tiph->daddr || rt6->rt6i_dst.plen == 128) {
1896 + rt6->rt6i_flags |= RTF_MODIFIED;
1897 + skb->dst->metrics[RTAX_MTU-1] = mtu;
1901 + /* @@@ Is this correct? */
1902 + if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
1903 + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
1910 + printk(KERN_ALERT "gre.c:1065 Passed the fragmentation check.\n");
1913 + if (tunnel->err_count > 0) {
1914 + if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
1915 + tunnel->err_count--;
1916 + dst_link_failure(skb);
1918 + tunnel->err_count = 0;
1921 + max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
1923 + if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
1924 + struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
1927 + stats->tx_dropped++;
1928 + dev_kfree_skb(skb);
1929 + tunnel->recursion--;
1933 + skb_set_owner_w(new_skb, skb->sk);
1934 + dev_kfree_skb(skb);
1936 + old_iph = ip_hdr(skb);
1939 + printk(KERN_ALERT "gre.c:1094 Passed the headroom calculation\n");
1942 + skb->transport_header = skb->data;
1943 + skb_push(skb, gre_hlen);
1944 + skb_reset_network_header(skb);
1945 + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1946 + dst_release(skb->dst);
1947 + skb->dst = &rt->u.dst;
1950 + * Push down and install the etherip header.
1953 + iph = ip_hdr(skb);
1955 + iph->ihl = sizeof(struct iphdr) >> 2;
1956 + iph->frag_off = df;
1957 + iph->protocol = IPPROTO_GRE;
1958 + iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
1959 + iph->daddr = rt->rt_dst;
1960 + iph->saddr = rt->rt_src;
1962 +/* ethiph->version = htons(ETHERIP_VERSION); */
1964 + printk(KERN_ALERT "gre.c:1121 Passed outer IP header construction.\n");
1967 + if ((iph->ttl = tiph->ttl) == 0) {
1968 + if (skb->protocol == htons(ETH_P_IP))
1969 + iph->ttl = old_iph->ttl;
1971 + else if (skb->protocol == htons(ETH_P_IPV6))
1972 + iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
1975 + iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
1978 + printk(KERN_ALERT "gre.c:1006 Passed the TTL check.\n");
1981 + ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
1982 + ((__be16*)(iph+1))[1] = htons(tunnel->parms.proto_type);
1984 + if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
1985 + __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
1987 + if (tunnel->parms.o_flags&GRE_SEQ) {
1988 + ++tunnel->o_seqno;
1989 + *ptr = htonl(tunnel->o_seqno);
1992 + if (tunnel->parms.o_flags&GRE_KEY) {
1993 + *ptr = tunnel->parms.o_key;
1996 + if (tunnel->parms.o_flags&GRE_CSUM) {
1998 + *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
2002 + printk(KERN_ALERT "gre.c:1006 Passed the tunnel transmit.\n");
2008 + tunnel->recursion--;
2012 + dst_link_failure(skb);
2015 + stats->tx_errors++;
2016 + dev_kfree_skb(skb);
2017 + tunnel->recursion--;
2023 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
2025 @@ -902,6 +1171,8 @@
2026 struct ip_tunnel_parm p;
2027 struct ip_tunnel *t;
2029 + printk(KERN_ALERT "1174 GRE: entering gre ioctl. command is: %d\n", cmd);
2034 @@ -943,7 +1214,7 @@
2037 t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
2039 + if (t) printk(KERN_ALERT "1174 GRE: proto %s %x\n", p.name, p.proto_type);
2040 if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
2042 if (t->dev != dev) {
2043 @@ -960,6 +1231,12 @@
2044 else if (p.iph.daddr)
2045 nflags = IFF_POINTOPOINT;
2047 + /* XXX:Set back IFF_BROADCAST if
2048 + * transporting ethernet */
2049 + printk(KERN_ALERT "1193 GRE: proto %s %d\n", p.name, p.proto_type);
2050 + if (p.proto_type == ETH_P_ETH)
2051 + nflags = IFF_BROADCAST;
2053 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
2056 @@ -969,8 +1246,12 @@
2057 t->parms.iph.daddr = p.iph.daddr;
2058 t->parms.i_key = p.i_key;
2059 t->parms.o_key = p.o_key;
2060 + /* XXX:Copy in the protocol field */
2061 + t->parms.proto_type = p.proto_type;
2062 + if (t->parms.proto_type != ETH_P_ETH) {
2063 memcpy(dev->dev_addr, &p.iph.saddr, 4);
2064 memcpy(dev->broadcast, &p.iph.daddr, 4);
2066 ipgre_tunnel_link(t);
2067 netdev_state_change(dev);
2069 @@ -1129,12 +1410,12 @@
2073 -static void ipgre_tunnel_setup(struct net_device *dev)
2074 +static void ipgre_ip_tunnel_setup(struct net_device *dev)
2076 SET_MODULE_OWNER(dev);
2077 dev->uninit = ipgre_tunnel_uninit;
2078 dev->destructor = free_netdev;
2079 - dev->hard_start_xmit = ipgre_tunnel_xmit;
2080 + dev->hard_start_xmit = ipgre_ip_tunnel_xmit;
2081 dev->get_stats = ipgre_tunnel_get_stats;
2082 dev->do_ioctl = ipgre_tunnel_ioctl;
2083 dev->change_mtu = ipgre_tunnel_change_mtu;
2084 @@ -1147,6 +1428,35 @@
2088 +/* Tunnel setup for ipgre_eth */
2089 +static void ipgre_eth_tunnel_setup(struct net_device *dev)
2091 + SET_MODULE_OWNER(dev);
2093 + // Set default values for Ethernet device
2096 + dev->uninit = ipgre_tunnel_uninit;
2097 + dev->destructor = free_netdev;
2098 + dev->hard_start_xmit = ipgre_eth_tunnel_xmit;
2099 + dev->get_stats = ipgre_tunnel_get_stats;
2100 + dev->do_ioctl = ipgre_tunnel_ioctl;
2101 + dev->change_mtu = ipgre_tunnel_change_mtu;
2103 + dev->hard_header_len = LL_MAX_HEADER + ETH_HLEN + sizeof(struct iphdr) + 4;
2104 + dev->mtu = ETH_DATA_LEN - ETH_HLEN - sizeof(struct iphdr) - 4;
2105 + dev->tx_queue_len = 0;
2108 + random_ether_addr(dev->dev_addr);
2111 + { unsigned char* d = dev->dev_addr;
2112 + printk(KERN_ALERT "Here is the address we got:%x%x%x%x%x%x\n",d[0],d[1],d[2],d[3],d[4],d[5]); }
2117 static int ipgre_tunnel_init(struct net_device *dev)
2119 struct net_device *tdev = NULL;
2120 @@ -1162,8 +1472,12 @@
2122 strcpy(tunnel->parms.name, dev->name);
2124 + if (tunnel->parms.proto_type != ETH_P_ETH) {
2125 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
2126 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
2128 + addend += ETH_HLEN;
2131 /* Guess output device to choose reasonable mtu and hard_header_len */
2133 @@ -1180,7 +1494,14 @@
2137 + if (tunnel->parms.proto_type == ETH_P_ETH)
2139 + dev->flags |= IFF_BROADCAST;
2143 dev->flags |= IFF_POINTOPOINT;
2146 #ifdef CONFIG_NET_IPGRE_BROADCAST
2147 if (MULTICAST(iph->daddr)) {
2148 @@ -1259,7 +1580,7 @@
2151 ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
2152 - ipgre_tunnel_setup);
2153 + ipgre_ip_tunnel_setup);
2154 if (!ipgre_fb_tunnel_dev) {
2157 diff -Nurb linux-2.6.22-592/net/ipv4/ip_gre.c.orig linux-2.6.22-593/net/ipv4/ip_gre.c.orig
2158 --- linux-2.6.22-592/net/ipv4/ip_gre.c.orig 1969-12-31 19:00:00.000000000 -0500
2159 +++ linux-2.6.22-593/net/ipv4/ip_gre.c.orig 2007-07-08 19:32:17.000000000 -0400
2162 + * Linux NET3: GRE over IP protocol decoder.
2164 + * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
2166 + * This program is free software; you can redistribute it and/or
2167 + * modify it under the terms of the GNU General Public License
2168 + * as published by the Free Software Foundation; either version
2169 + * 2 of the License, or (at your option) any later version.
2173 +#include <linux/capability.h>
2174 +#include <linux/module.h>
2175 +#include <linux/types.h>
2176 +#include <linux/kernel.h>
2177 +#include <asm/uaccess.h>
2178 +#include <linux/skbuff.h>
2179 +#include <linux/netdevice.h>
2180 +#include <linux/in.h>
2181 +#include <linux/tcp.h>
2182 +#include <linux/udp.h>
2183 +#include <linux/if_arp.h>
2184 +#include <linux/mroute.h>
2185 +#include <linux/init.h>
2186 +#include <linux/in6.h>
2187 +#include <linux/inetdevice.h>
2188 +#include <linux/igmp.h>
2189 +#include <linux/netfilter_ipv4.h>
2190 +#include <linux/if_ether.h>
2192 +#include <net/sock.h>
2193 +#include <net/ip.h>
2194 +#include <net/icmp.h>
2195 +#include <net/protocol.h>
2196 +#include <net/ipip.h>
2197 +#include <net/arp.h>
2198 +#include <net/checksum.h>
2199 +#include <net/dsfield.h>
2200 +#include <net/inet_ecn.h>
2201 +#include <net/xfrm.h>
2204 +#include <net/ipv6.h>
2205 +#include <net/ip6_fib.h>
2206 +#include <net/ip6_route.h>
2210 + Problems & solutions
2211 + --------------------
2213 + 1. The most important issue is detecting local dead loops.
2214 + They would cause complete host lockup in transmit, which
2215 + would be "resolved" by stack overflow or, if queueing is enabled,
2216 + with infinite looping in net_bh.
2218 + We cannot track such dead loops during route installation,
2219 + it is infeasible task. The most general solutions would be
2220 + to keep skb->encapsulation counter (sort of local ttl),
2221 + and silently drop packet when it expires. It is the best
2222 + solution, but it supposes maintaing new variable in ALL
2223 + skb, even if no tunneling is used.
2225 + Current solution: t->recursion lock breaks dead loops. It looks
2226 + like dev->tbusy flag, but I preferred new variable, because
2227 + the semantics is different. One day, when hard_start_xmit
2228 + will be multithreaded we will have to use skb->encapsulation.
2232 + 2. Networking dead loops would not kill routers, but would really
2233 + kill network. IP hop limit plays role of "t->recursion" in this case,
2234 + if we copy it from packet being encapsulated to upper header.
2235 + It is very good solution, but it introduces two problems:
2237 + - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
2238 + do not work over tunnels.
2239 + - traceroute does not work. I planned to relay ICMP from tunnel,
2240 + so that this problem would be solved and traceroute output
2241 + would even more informative. This idea appeared to be wrong:
2242 + only Linux complies to rfc1812 now (yes, guys, Linux is the only
2243 + true router now :-)), all routers (at least, in neighbourhood of mine)
2244 + return only 8 bytes of payload. It is the end.
2246 + Hence, if we want that OSPF worked or traceroute said something reasonable,
2247 + we should search for another solution.
2249 + One of them is to parse packet trying to detect inner encapsulation
2250 + made by our node. It is difficult or even impossible, especially,
2251 + taking into account fragmentation. TO be short, tt is not solution at all.
2253 + Current solution: The solution was UNEXPECTEDLY SIMPLE.
2254 + We force DF flag on tunnels with preconfigured hop limit,
2255 + that is ALL. :-) Well, it does not remove the problem completely,
2256 + but exponential growth of network traffic is changed to linear
2257 + (branches, that exceed pmtu are pruned) and tunnel mtu
2258 + fastly degrades to value <68, where looping stops.
2259 + Yes, it is not good if there exists a router in the loop,
2260 + which does not force DF, even when encapsulating packets have DF set.
2261 + But it is not our problem! Nobody could accuse us, we made
2262 + all that we could make. Even if it is your gated who injected
2263 + fatal route to network, even if it were you who configured
2264 + fatal static route: you are innocent. :-)
2268 + 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
2269 + practically identical code. It would be good to glue them
2270 + together, but it is not very evident, how to make them modular.
2271 + sit is integral part of IPv6, ipip and gre are naturally modular.
2272 + We could extract common parts (hash table, ioctl etc)
2273 + to a separate module (ip_tunnel.c).
2278 +static int ipgre_tunnel_init(struct net_device *dev);
2279 +static void ipgre_tunnel_setup(struct net_device *dev);
2281 +/* Fallback tunnel: no source, no destination, no key, no options */
2283 +static int ipgre_fb_tunnel_init(struct net_device *dev);
2285 +static struct net_device *ipgre_fb_tunnel_dev;
2287 +/* Tunnel hash table */
2297 + We require exact key match i.e. if a key is present in packet
2298 + it will match only tunnel with the same key; if it is not present,
2299 + it will match only keyless tunnel.
2301 + All keysless packets, if not matched configured keyless tunnels
2302 + will match fallback tunnel.
2305 +#define HASH_SIZE 16
2306 +#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
2308 +static struct ip_tunnel *tunnels[4][HASH_SIZE];
2310 +#define tunnels_r_l (tunnels[3])
2311 +#define tunnels_r (tunnels[2])
2312 +#define tunnels_l (tunnels[1])
2313 +#define tunnels_wc (tunnels[0])
2315 +static DEFINE_RWLOCK(ipgre_lock);
2317 +/* Given src, dst and key, find appropriate for input tunnel. */
2319 +static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be32 key)
2321 + unsigned h0 = HASH(remote);
2322 + unsigned h1 = HASH(key);
2323 + struct ip_tunnel *t;
2325 + for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
2326 + if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
2327 + if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
2331 + for (t = tunnels_r[h0^h1]; t; t = t->next) {
2332 + if (remote == t->parms.iph.daddr) {
2333 + if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
2337 + for (t = tunnels_l[h1]; t; t = t->next) {
2338 + if (local == t->parms.iph.saddr ||
2339 + (local == t->parms.iph.daddr && MULTICAST(local))) {
2340 + if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
2344 + for (t = tunnels_wc[h1]; t; t = t->next) {
2345 + if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
2349 + if (ipgre_fb_tunnel_dev->flags&IFF_UP)
2350 + return netdev_priv(ipgre_fb_tunnel_dev);
2354 +static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
2356 + __be32 remote = parms->iph.daddr;
2357 + __be32 local = parms->iph.saddr;
2358 + __be32 key = parms->i_key;
2359 + unsigned h = HASH(key);
2364 + if (remote && !MULTICAST(remote)) {
2366 + h ^= HASH(remote);
2369 + return &tunnels[prio][h];
2372 +static inline struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
2374 + return __ipgre_bucket(&t->parms);
2377 +static void ipgre_tunnel_link(struct ip_tunnel *t)
2379 + struct ip_tunnel **tp = ipgre_bucket(t);
2382 + write_lock_bh(&ipgre_lock);
2384 + write_unlock_bh(&ipgre_lock);
2387 +static void ipgre_tunnel_unlink(struct ip_tunnel *t)
2389 + struct ip_tunnel **tp;
2391 + for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) {
2393 + write_lock_bh(&ipgre_lock);
2395 + write_unlock_bh(&ipgre_lock);
2401 +static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
2403 + __be32 remote = parms->iph.daddr;
2404 + __be32 local = parms->iph.saddr;
2405 + __be32 key = parms->i_key;
2406 + struct ip_tunnel *t, **tp, *nt;
2407 + struct net_device *dev;
2408 + char name[IFNAMSIZ];
2410 + for (tp = __ipgre_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
2411 + if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
2412 + if (key == t->parms.i_key)
2419 + if (parms->name[0])
2420 + strlcpy(name, parms->name, IFNAMSIZ);
2423 + for (i=1; i<100; i++) {
2424 + sprintf(name, "gre%d", i);
2425 + if (__dev_get_by_name(name) == NULL)
2432 + dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
2436 + dev->init = ipgre_tunnel_init;
2437 + nt = netdev_priv(dev);
2438 + nt->parms = *parms;
2440 + if (register_netdevice(dev) < 0) {
2446 + ipgre_tunnel_link(nt);
2453 +static void ipgre_tunnel_uninit(struct net_device *dev)
2455 + ipgre_tunnel_unlink(netdev_priv(dev));
2460 +static void ipgre_err(struct sk_buff *skb, u32 info)
2462 +#ifndef I_WISH_WORLD_WERE_PERFECT
2464 +/* It is not :-( All the routers (except for Linux) return only
2465 + 8 bytes of packet payload. It means, that precise relaying of
2466 + ICMP in the real Internet is absolutely infeasible.
2468 + Moreover, Cisco "wise men" put GRE key to the third word
2469 + in GRE header. It makes impossible maintaining even soft state for keyed
2470 + GRE tunnels with enabled checksum. Tell them "thank you".
2472 + Well, I wonder, rfc1812 was written by Cisco employee,
2473 + what the hell these idiots break standrads established
2477 + struct iphdr *iph = (struct iphdr*)skb->data;
2478 + __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
2479 + int grehlen = (iph->ihl<<2) + 4;
2480 + const int type = icmp_hdr(skb)->type;
2481 + const int code = icmp_hdr(skb)->code;
2482 + struct ip_tunnel *t;
2486 + if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
2487 + if (flags&(GRE_VERSION|GRE_ROUTING))
2489 + if (flags&GRE_KEY) {
2491 + if (flags&GRE_CSUM)
2496 + /* If only 8 bytes returned, keyed message will be dropped here */
2497 + if (skb_headlen(skb) < grehlen)
2502 + case ICMP_PARAMETERPROB:
2505 + case ICMP_DEST_UNREACH:
2507 + case ICMP_SR_FAILED:
2508 + case ICMP_PORT_UNREACH:
2509 + /* Impossible event. */
2511 + case ICMP_FRAG_NEEDED:
2512 + /* Soft state for pmtu is maintained by IP core. */
2515 + /* All others are translated to HOST_UNREACH.
2516 + rfc2003 contains "deep thoughts" about NET_UNREACH,
2517 + I believe they are just ether pollution. --ANK
2522 + case ICMP_TIME_EXCEEDED:
2523 + if (code != ICMP_EXC_TTL)
2528 + read_lock(&ipgre_lock);
2529 + t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0);
2530 + if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr))
2533 + if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
2536 + if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
2540 + t->err_time = jiffies;
2542 + read_unlock(&ipgre_lock);
2545 + struct iphdr *iph = (struct iphdr*)dp;
2546 + struct iphdr *eiph;
2547 + __be16 *p = (__be16*)(dp+(iph->ihl<<2));
2548 + const int type = icmp_hdr(skb)->type;
2549 + const int code = icmp_hdr(skb)->code;
2552 + __be32 rel_info = 0;
2555 + int grehlen = (iph->ihl<<2) + 4;
2556 + struct sk_buff *skb2;
2558 + struct rtable *rt;
2560 + if (p[1] != htons(ETH_P_IP))
2564 + if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
2565 + if (flags&(GRE_VERSION|GRE_ROUTING))
2567 + if (flags&GRE_CSUM)
2569 + if (flags&GRE_KEY)
2571 + if (flags&GRE_SEQ)
2574 + if (len < grehlen + sizeof(struct iphdr))
2576 + eiph = (struct iphdr*)(dp + grehlen);
2581 + case ICMP_PARAMETERPROB:
2582 + n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
2583 + if (n < (iph->ihl<<2))
2586 + /* So... This guy found something strange INSIDE encapsulated
2587 + packet. Well, he is fool, but what can we do ?
2589 + rel_type = ICMP_PARAMETERPROB;
2591 + rel_info = htonl(n << 24);
2594 + case ICMP_DEST_UNREACH:
2596 + case ICMP_SR_FAILED:
2597 + case ICMP_PORT_UNREACH:
2598 + /* Impossible event. */
2600 + case ICMP_FRAG_NEEDED:
2601 + /* And it is the only really necessary thing :-) */
2602 + n = ntohs(icmp_hdr(skb)->un.frag.mtu);
2603 + if (n < grehlen+68)
2606 + /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
2607 + if (n > ntohs(eiph->tot_len))
2609 + rel_info = htonl(n);
2612 + /* All others are translated to HOST_UNREACH.
2613 + rfc2003 contains "deep thoughts" about NET_UNREACH,
2614 + I believe, it is just ether pollution. --ANK
2616 + rel_type = ICMP_DEST_UNREACH;
2617 + rel_code = ICMP_HOST_UNREACH;
2621 + case ICMP_TIME_EXCEEDED:
2622 + if (code != ICMP_EXC_TTL)
2627 + /* Prepare fake skb to feed it to icmp_send */
2628 + skb2 = skb_clone(skb, GFP_ATOMIC);
2631 + dst_release(skb2->dst);
2633 + skb_pull(skb2, skb->data - (u8*)eiph);
2634 + skb_reset_network_header(skb2);
2636 + /* Try to guess incoming interface */
2637 + memset(&fl, 0, sizeof(fl));
2638 + fl.fl4_dst = eiph->saddr;
2639 + fl.fl4_tos = RT_TOS(eiph->tos);
2640 + fl.proto = IPPROTO_GRE;
2641 + if (ip_route_output_key(&rt, &fl)) {
2645 + skb2->dev = rt->u.dst.dev;
2647 + /* route "incoming" packet */
2648 + if (rt->rt_flags&RTCF_LOCAL) {
2651 + fl.fl4_dst = eiph->daddr;
2652 + fl.fl4_src = eiph->saddr;
2653 + fl.fl4_tos = eiph->tos;
2654 + if (ip_route_output_key(&rt, &fl) ||
2655 + rt->u.dst.dev->type != ARPHRD_IPGRE) {
2662 + if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
2663 + skb2->dst->dev->type != ARPHRD_IPGRE) {
2669 + /* change mtu on this route */
2670 + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
2671 + if (n > dst_mtu(skb2->dst)) {
2675 + skb2->dst->ops->update_pmtu(skb2->dst, n);
2676 + } else if (type == ICMP_TIME_EXCEEDED) {
2677 + struct ip_tunnel *t = netdev_priv(skb2->dev);
2678 + if (t->parms.iph.ttl) {
2679 + rel_type = ICMP_DEST_UNREACH;
2680 + rel_code = ICMP_HOST_UNREACH;
2684 + icmp_send(skb2, rel_type, rel_code, rel_info);
2689 +static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
2691 + if (INET_ECN_is_ce(iph->tos)) {
2692 + if (skb->protocol == htons(ETH_P_IP)) {
2693 + IP_ECN_set_ce(ip_hdr(skb));
2694 + } else if (skb->protocol == htons(ETH_P_IPV6)) {
2695 + IP6_ECN_set_ce(ipv6_hdr(skb));
2701 +ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
2704 + if (skb->protocol == htons(ETH_P_IP))
2705 + inner = old_iph->tos;
2706 + else if (skb->protocol == htons(ETH_P_IPV6))
2707 + inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
2708 + return INET_ECN_encapsulate(tos, inner);
2711 +static int ipgre_rcv(struct sk_buff *skb)
2713 + struct iphdr *iph;
2719 + struct ip_tunnel *tunnel;
2722 + if (!pskb_may_pull(skb, 16))
2725 + iph = ip_hdr(skb);
2727 + flags = *(__be16*)h;
2729 + if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
2730 + /* - Version must be 0.
2731 + - We do not support routing headers.
2733 + if (flags&(GRE_VERSION|GRE_ROUTING))
2736 + if (flags&GRE_CSUM) {
2737 + switch (skb->ip_summed) {
2738 + case CHECKSUM_COMPLETE:
2739 + csum = csum_fold(skb->csum);
2742 + /* fall through */
2743 + case CHECKSUM_NONE:
2745 + csum = __skb_checksum_complete(skb);
2746 + skb->ip_summed = CHECKSUM_COMPLETE;
2750 + if (flags&GRE_KEY) {
2751 + key = *(__be32*)(h + offset);
2754 + if (flags&GRE_SEQ) {
2755 + seqno = ntohl(*(__be32*)(h + offset));
2760 + read_lock(&ipgre_lock);
2761 + if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
2762 + secpath_reset(skb);
2764 + skb->protocol = *(__be16*)(h + 2);
2765 + /* WCCP version 1 and 2 protocol decoding.
2766 + * - Change protocol to IP
2767 + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
2770 + skb->protocol == htons(ETH_P_WCCP)) {
2771 + skb->protocol = htons(ETH_P_IP);
2772 + if ((*(h + offset) & 0xF0) != 0x40)
2776 + skb_reset_mac_header(skb);
2777 + __pskb_pull(skb, offset);
2778 + skb_reset_network_header(skb);
2779 + skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
2780 + skb->pkt_type = PACKET_HOST;
2781 +#ifdef CONFIG_NET_IPGRE_BROADCAST
2782 + if (MULTICAST(iph->daddr)) {
2783 + /* Looped back packet, drop it! */
2784 + if (((struct rtable*)skb->dst)->fl.iif == 0)
2786 + tunnel->stat.multicast++;
2787 + skb->pkt_type = PACKET_BROADCAST;
2791 + if (((flags&GRE_CSUM) && csum) ||
2792 + (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
2793 + tunnel->stat.rx_crc_errors++;
2794 + tunnel->stat.rx_errors++;
2797 + if (tunnel->parms.i_flags&GRE_SEQ) {
2798 + if (!(flags&GRE_SEQ) ||
2799 + (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
2800 + tunnel->stat.rx_fifo_errors++;
2801 + tunnel->stat.rx_errors++;
2804 + tunnel->i_seqno = seqno + 1;
2806 + tunnel->stat.rx_packets++;
2807 + tunnel->stat.rx_bytes += skb->len;
2808 + skb->dev = tunnel->dev;
2809 + dst_release(skb->dst);
2812 + ipgre_ecn_decapsulate(iph, skb);
2814 + read_unlock(&ipgre_lock);
2817 + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
2820 + read_unlock(&ipgre_lock);
2826 +static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
2828 + struct ip_tunnel *tunnel = netdev_priv(dev);
2829 + struct net_device_stats *stats = &tunnel->stat;
2830 + struct iphdr *old_iph = ip_hdr(skb);
2831 + struct iphdr *tiph;
2834 + struct rtable *rt; /* Route to the other host */
2835 + struct net_device *tdev; /* Device to other host */
2836 + struct iphdr *iph; /* Our new IP header */
2837 + int max_headroom; /* The extra header space needed */
2842 + if (tunnel->recursion++) {
2843 + tunnel->stat.collisions++;
2847 + if (dev->hard_header) {
2849 + tiph = (struct iphdr*)skb->data;
2851 + gre_hlen = tunnel->hlen;
2852 + tiph = &tunnel->parms.iph;
2855 + if ((dst = tiph->daddr) == 0) {
2858 + if (skb->dst == NULL) {
2859 + tunnel->stat.tx_fifo_errors++;
2863 + if (skb->protocol == htons(ETH_P_IP)) {
2864 + rt = (struct rtable*)skb->dst;
2865 + if ((dst = rt->rt_gateway) == 0)
2866 + goto tx_error_icmp;
2869 + else if (skb->protocol == htons(ETH_P_IPV6)) {
2870 + struct in6_addr *addr6;
2872 + struct neighbour *neigh = skb->dst->neighbour;
2874 + if (neigh == NULL)
2877 + addr6 = (struct in6_addr*)&neigh->primary_key;
2878 + addr_type = ipv6_addr_type(addr6);
2880 + if (addr_type == IPV6_ADDR_ANY) {
2881 + addr6 = &ipv6_hdr(skb)->daddr;
2882 + addr_type = ipv6_addr_type(addr6);
2885 + if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
2886 + goto tx_error_icmp;
2888 + dst = addr6->s6_addr32[3];
2897 + if (skb->protocol == htons(ETH_P_IP))
2898 + tos = old_iph->tos;
2903 + struct flowi fl = { .oif = tunnel->parms.link,
2904 + .nl_u = { .ip4_u =
2906 + .saddr = tiph->saddr,
2907 + .tos = RT_TOS(tos) } },
2908 + .proto = IPPROTO_GRE };
2909 + if (ip_route_output_key(&rt, &fl)) {
2910 + tunnel->stat.tx_carrier_errors++;
2914 + tdev = rt->u.dst.dev;
2916 + if (tdev == dev) {
2918 + tunnel->stat.collisions++;
2922 + df = tiph->frag_off;
2924 + mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
2926 + mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
2929 + skb->dst->ops->update_pmtu(skb->dst, mtu);
2931 + if (skb->protocol == htons(ETH_P_IP)) {
2932 + df |= (old_iph->frag_off&htons(IP_DF));
2934 + if ((old_iph->frag_off&htons(IP_DF)) &&
2935 + mtu < ntohs(old_iph->tot_len)) {
2936 + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
2942 + else if (skb->protocol == htons(ETH_P_IPV6)) {
2943 + struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
2945 + if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
2946 + if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) ||
2947 + rt6->rt6i_dst.plen == 128) {
2948 + rt6->rt6i_flags |= RTF_MODIFIED;
2949 + skb->dst->metrics[RTAX_MTU-1] = mtu;
2953 + if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
2954 + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
2961 + if (tunnel->err_count > 0) {
2962 + if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
2963 + tunnel->err_count--;
2965 + dst_link_failure(skb);
2967 + tunnel->err_count = 0;
2970 + max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
2972 + if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
2973 + struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
2976 + stats->tx_dropped++;
2977 + dev_kfree_skb(skb);
2978 + tunnel->recursion--;
2982 + skb_set_owner_w(new_skb, skb->sk);
2983 + dev_kfree_skb(skb);
2985 + old_iph = ip_hdr(skb);
2988 + skb->transport_header = skb->network_header;
2989 + skb_push(skb, gre_hlen);
2990 + skb_reset_network_header(skb);
2991 + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
2992 + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
2994 + dst_release(skb->dst);
2995 + skb->dst = &rt->u.dst;
2998 + * Push down and install the IPIP header.
3001 + iph = ip_hdr(skb);
3003 + iph->ihl = sizeof(struct iphdr) >> 2;
3004 + iph->frag_off = df;
3005 + iph->protocol = IPPROTO_GRE;
3006 + iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
3007 + iph->daddr = rt->rt_dst;
3008 + iph->saddr = rt->rt_src;
3010 + if ((iph->ttl = tiph->ttl) == 0) {
3011 + if (skb->protocol == htons(ETH_P_IP))
3012 + iph->ttl = old_iph->ttl;
3014 + else if (skb->protocol == htons(ETH_P_IPV6))
3015 + iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
3018 + iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
3021 + ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
3022 + ((__be16*)(iph+1))[1] = skb->protocol;
3024 + if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
3025 + __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
3027 + if (tunnel->parms.o_flags&GRE_SEQ) {
3028 + ++tunnel->o_seqno;
3029 + *ptr = htonl(tunnel->o_seqno);
3032 + if (tunnel->parms.o_flags&GRE_KEY) {
3033 + *ptr = tunnel->parms.o_key;
3036 + if (tunnel->parms.o_flags&GRE_CSUM) {
3038 + *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
3045 + tunnel->recursion--;
3049 + dst_link_failure(skb);
3052 + stats->tx_errors++;
3053 + dev_kfree_skb(skb);
3054 + tunnel->recursion--;
3059 +ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
3062 + struct ip_tunnel_parm p;
3063 + struct ip_tunnel *t;
3066 + case SIOCGETTUNNEL:
3068 + if (dev == ipgre_fb_tunnel_dev) {
3069 + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
3073 + t = ipgre_tunnel_locate(&p, 0);
3076 + t = netdev_priv(dev);
3077 + memcpy(&p, &t->parms, sizeof(p));
3078 + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
3082 + case SIOCADDTUNNEL:
3083 + case SIOCCHGTUNNEL:
3085 + if (!capable(CAP_NET_ADMIN))
3089 + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
3093 + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
3094 + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
3095 + ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
3098 + p.iph.frag_off |= htons(IP_DF);
3100 + if (!(p.i_flags&GRE_KEY))
3102 + if (!(p.o_flags&GRE_KEY))
3105 + t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
3107 + if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
3109 + if (t->dev != dev) {
3114 + unsigned nflags=0;
3116 + t = netdev_priv(dev);
3118 + if (MULTICAST(p.iph.daddr))
3119 + nflags = IFF_BROADCAST;
3120 + else if (p.iph.daddr)
3121 + nflags = IFF_POINTOPOINT;
3123 + if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
3127 + ipgre_tunnel_unlink(t);
3128 + t->parms.iph.saddr = p.iph.saddr;
3129 + t->parms.iph.daddr = p.iph.daddr;
3130 + t->parms.i_key = p.i_key;
3131 + t->parms.o_key = p.o_key;
3132 + memcpy(dev->dev_addr, &p.iph.saddr, 4);
3133 + memcpy(dev->broadcast, &p.iph.daddr, 4);
3134 + ipgre_tunnel_link(t);
3135 + netdev_state_change(dev);
3141 + if (cmd == SIOCCHGTUNNEL) {
3142 + t->parms.iph.ttl = p.iph.ttl;
3143 + t->parms.iph.tos = p.iph.tos;
3144 + t->parms.iph.frag_off = p.iph.frag_off;
3146 + if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
3149 + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
3152 + case SIOCDELTUNNEL:
3154 + if (!capable(CAP_NET_ADMIN))
3157 + if (dev == ipgre_fb_tunnel_dev) {
3159 + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
3162 + if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
3165 + if (t == netdev_priv(ipgre_fb_tunnel_dev))
3169 + unregister_netdevice(dev);
3181 +static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
3183 + return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
3186 +static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
3188 + struct ip_tunnel *tunnel = netdev_priv(dev);
3189 + if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
3191 + dev->mtu = new_mtu;
3195 +#ifdef CONFIG_NET_IPGRE_BROADCAST
3196 +/* Nice toy. Unfortunately, useless in real life :-)
3197 + It allows to construct virtual multiprotocol broadcast "LAN"
3198 + over the Internet, provided multicast routing is tuned.
3201 + I have no idea was this bicycle invented before me,
3202 + so that I had to set ARPHRD_IPGRE to a random value.
3203 + I have an impression, that Cisco could make something similar,
3204 + but this feature is apparently missing in IOS<=11.2(8).
3206 + I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
3207 + with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
3209 + ping -t 255 224.66.66.66
3211 + If nobody answers, mbone does not work.
3213 + ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
3214 + ip addr add 10.66.66.<somewhat>/24 dev Universe
3215 + ifconfig Universe up
3216 + ifconfig Universe add fe80::<Your_real_addr>/10
3217 + ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
3220 + ftp fec0:6666:6666::193.233.7.65
3225 +static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
3226 + void *daddr, void *saddr, unsigned len)
3228 + struct ip_tunnel *t = netdev_priv(dev);
3229 + struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
3230 + __be16 *p = (__be16*)(iph+1);
3232 + memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
3233 + p[0] = t->parms.o_flags;
3234 + p[1] = htons(type);
3237 + * Set the source hardware address.
3241 + memcpy(&iph->saddr, saddr, 4);
3244 + memcpy(&iph->daddr, daddr, 4);
3247 + if (iph->daddr && !MULTICAST(iph->daddr))
3253 +static int ipgre_open(struct net_device *dev)
3255 + struct ip_tunnel *t = netdev_priv(dev);
3257 + if (MULTICAST(t->parms.iph.daddr)) {
3258 + struct flowi fl = { .oif = t->parms.link,
3259 + .nl_u = { .ip4_u =
3260 + { .daddr = t->parms.iph.daddr,
3261 + .saddr = t->parms.iph.saddr,
3262 + .tos = RT_TOS(t->parms.iph.tos) } },
3263 + .proto = IPPROTO_GRE };
3264 + struct rtable *rt;
3265 + if (ip_route_output_key(&rt, &fl))
3266 + return -EADDRNOTAVAIL;
3267 + dev = rt->u.dst.dev;
3269 + if (__in_dev_get_rtnl(dev) == NULL)
3270 + return -EADDRNOTAVAIL;
3271 + t->mlink = dev->ifindex;
3272 + ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
3277 +static int ipgre_close(struct net_device *dev)
3279 + struct ip_tunnel *t = netdev_priv(dev);
3280 + if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
3281 + struct in_device *in_dev = inetdev_by_index(t->mlink);
3283 + ip_mc_dec_group(in_dev, t->parms.iph.daddr);
3284 + in_dev_put(in_dev);
3292 +static void ipgre_tunnel_setup(struct net_device *dev)
3294 + SET_MODULE_OWNER(dev);
3295 + dev->uninit = ipgre_tunnel_uninit;
3296 + dev->destructor = free_netdev;
3297 + dev->hard_start_xmit = ipgre_tunnel_xmit;
3298 + dev->get_stats = ipgre_tunnel_get_stats;
3299 + dev->do_ioctl = ipgre_tunnel_ioctl;
3300 + dev->change_mtu = ipgre_tunnel_change_mtu;
3302 + dev->type = ARPHRD_IPGRE;
3303 + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
3304 + dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
3305 + dev->flags = IFF_NOARP;
3307 + dev->addr_len = 4;
3310 +static int ipgre_tunnel_init(struct net_device *dev)
3312 + struct net_device *tdev = NULL;
3313 + struct ip_tunnel *tunnel;
3314 + struct iphdr *iph;
3315 + int hlen = LL_MAX_HEADER;
3316 + int mtu = ETH_DATA_LEN;
3317 + int addend = sizeof(struct iphdr) + 4;
3319 + tunnel = netdev_priv(dev);
3320 + iph = &tunnel->parms.iph;
3322 + tunnel->dev = dev;
3323 + strcpy(tunnel->parms.name, dev->name);
3325 + memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
3326 + memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
3328 + /* Guess output device to choose reasonable mtu and hard_header_len */
3331 + struct flowi fl = { .oif = tunnel->parms.link,
3332 + .nl_u = { .ip4_u =
3333 + { .daddr = iph->daddr,
3334 + .saddr = iph->saddr,
3335 + .tos = RT_TOS(iph->tos) } },
3336 + .proto = IPPROTO_GRE };
3337 + struct rtable *rt;
3338 + if (!ip_route_output_key(&rt, &fl)) {
3339 + tdev = rt->u.dst.dev;
3343 + dev->flags |= IFF_POINTOPOINT;
3345 +#ifdef CONFIG_NET_IPGRE_BROADCAST
3346 + if (MULTICAST(iph->daddr)) {
3349 + dev->flags = IFF_BROADCAST;
3350 + dev->hard_header = ipgre_header;
3351 + dev->open = ipgre_open;
3352 + dev->stop = ipgre_close;
3357 + if (!tdev && tunnel->parms.link)
3358 + tdev = __dev_get_by_index(tunnel->parms.link);
3361 + hlen = tdev->hard_header_len;
3364 + dev->iflink = tunnel->parms.link;
3366 + /* Precalculate GRE options length */
3367 + if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
3368 + if (tunnel->parms.o_flags&GRE_CSUM)
3370 + if (tunnel->parms.o_flags&GRE_KEY)
3372 + if (tunnel->parms.o_flags&GRE_SEQ)
3375 + dev->hard_header_len = hlen + addend;
3376 + dev->mtu = mtu - addend;
3377 + tunnel->hlen = addend;
3381 +static int __init ipgre_fb_tunnel_init(struct net_device *dev)
3383 + struct ip_tunnel *tunnel = netdev_priv(dev);
3384 + struct iphdr *iph = &tunnel->parms.iph;
3386 + tunnel->dev = dev;
3387 + strcpy(tunnel->parms.name, dev->name);
3390 + iph->protocol = IPPROTO_GRE;
3392 + tunnel->hlen = sizeof(struct iphdr) + 4;
3395 + tunnels_wc[0] = tunnel;
3400 +static struct net_protocol ipgre_protocol = {
3401 + .handler = ipgre_rcv,
3402 + .err_handler = ipgre_err,
3407 + * And now the modules code and kernel interface.
3410 +static int __init ipgre_init(void)
3414 + printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
3416 + if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
3417 + printk(KERN_INFO "ipgre init: can't add protocol\n");
3421 + ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
3422 + ipgre_tunnel_setup);
3423 + if (!ipgre_fb_tunnel_dev) {
3428 + ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init;
3430 + if ((err = register_netdev(ipgre_fb_tunnel_dev)))
3435 + free_netdev(ipgre_fb_tunnel_dev);
3437 + inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
3441 +static void __exit ipgre_destroy_tunnels(void)
3445 + for (prio = 0; prio < 4; prio++) {
3447 + for (h = 0; h < HASH_SIZE; h++) {
3448 + struct ip_tunnel *t;
3449 + while ((t = tunnels[prio][h]) != NULL)
3450 + unregister_netdevice(t->dev);
3455 +static void __exit ipgre_fini(void)
3457 + if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
3458 + printk(KERN_INFO "ipgre close: can't remove protocol\n");
3461 + ipgre_destroy_tunnels();
3465 +module_init(ipgre_init);
3466 +module_exit(ipgre_fini);
3467 +MODULE_LICENSE("GPL");