1 /* ip_gre driver port to Linux 2.6.18 and greater */
3 #include <linux/version.h>
4 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
5 #define HAVE_NETDEV_STATS
7 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
8 #define HAVE_NETDEV_HEADER_OPS
10 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
11 #define HAVE_NETDEV_NEEDED_HEADROOM
15 * Linux NET3: GRE over IP protocol decoder.
17 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version.
26 #include <linux/capability.h>
27 #include <linux/ethtool.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <asm/uaccess.h>
32 #include <linux/skbuff.h>
33 #include <linux/netdevice.h>
35 #include <linux/tcp.h>
36 #include <linux/udp.h>
37 #include <linux/if_arp.h>
38 #include <linux/mroute.h>
39 #include <linux/init.h>
40 #include <linux/in6.h>
41 #include <linux/inetdevice.h>
42 #include <linux/igmp.h>
43 #include <linux/netfilter_ipv4.h>
44 #include <linux/etherdevice.h>
45 #include <linux/if_ether.h>
50 #include <net/protocol.h>
53 #include <net/checksum.h>
54 #include <net/dsfield.h>
55 #include <net/inet_ecn.h>
57 #include <net/net_namespace.h>
58 #include <net/netns/generic.h>
62 #include <net/ip6_fib.h>
63 #include <net/ip6_route.h>
67 #include "openvswitch/gre.h"
69 #ifndef GRE_IOCTL_ONLY
70 #include <net/rtnetlink.h>
77 1. The most important issue is detecting local dead loops.
78 They would cause complete host lockup in transmit, which
79 would be "resolved" by stack overflow or, if queueing is enabled,
80 with infinite looping in net_bh.
82 We cannot track such dead loops during route installation,
83 it is infeasible task. The most general solutions would be
84 to keep skb->encapsulation counter (sort of local ttl),
85 and silently drop packet when it expires. It is the best
86 solution, but it supposes maintaing new variable in ALL
87 skb, even if no tunneling is used.
89 Current solution: HARD_TX_LOCK lock breaks dead loops.
93 2. Networking dead loops would not kill routers, but would really
94 kill network. IP hop limit plays role of "t->recursion" in this case,
95 if we copy it from packet being encapsulated to upper header.
96 It is very good solution, but it introduces two problems:
98 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
99 do not work over tunnels.
100 - traceroute does not work. I planned to relay ICMP from tunnel,
101 so that this problem would be solved and traceroute output
102 would even more informative. This idea appeared to be wrong:
103 only Linux complies to rfc1812 now (yes, guys, Linux is the only
104 true router now :-)), all routers (at least, in neighbourhood of mine)
105 return only 8 bytes of payload. It is the end.
107 Hence, if we want that OSPF worked or traceroute said something reasonable,
108 we should search for another solution.
110 One of them is to parse packet trying to detect inner encapsulation
111 made by our node. It is difficult or even impossible, especially,
112 taking into account fragmentation. TO be short, tt is not solution at all.
114 Current solution: The solution was UNEXPECTEDLY SIMPLE.
115 We force DF flag on tunnels with preconfigured hop limit,
116 that is ALL. :-) Well, it does not remove the problem completely,
117 but exponential growth of network traffic is changed to linear
118 (branches, that exceed pmtu are pruned) and tunnel mtu
119 fastly degrades to value <68, where looping stops.
120 Yes, it is not good if there exists a router in the loop,
121 which does not force DF, even when encapsulating packets have DF set.
122 But it is not our problem! Nobody could accuse us, we made
123 all that we could make. Even if it is your gated who injected
124 fatal route to network, even if it were you who configured
125 fatal static route: you are innocent. :-)
129 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
130 practically identical code. It would be good to glue them
131 together, but it is not very evident, how to make them modular.
132 sit is integral part of IPv6, ipip and gre are naturally modular.
133 We could extract common parts (hash table, ioctl etc)
134 to a separate module (ip_tunnel.c).
139 #ifndef GRE_IOCTL_ONLY
140 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
141 static struct rtnl_link_ops ipgre_tap_ops __read_mostly;
143 static int ipgre_tunnel_init(struct net_device *dev);
144 static void ipgre_tunnel_setup(struct net_device *dev);
145 static void ipgre_tap_setup(struct net_device *dev);
146 static int ipgre_tunnel_bind_dev(struct net_device *dev);
150 static int ipgre_net_id __read_mostly;
152 struct ip_tunnel *tunnels[4][HASH_SIZE];
154 struct net_device *fb_tunnel_dev;
157 /* Tunnel hash table */
167 We require exact key match i.e. if a key is present in packet
168 it will match only tunnel with the same key; if it is not present,
169 it will match only keyless tunnel.
171 All keysless packets, if not matched configured keyless tunnels
172 will match fallback tunnel.
175 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
177 #define tunnels_r_l tunnels[3]
178 #define tunnels_r tunnels[2]
179 #define tunnels_l tunnels[1]
180 #define tunnels_wc tunnels[0]
182 * Locking : hash tables are protected by RCU and a spinlock
184 static DEFINE_SPINLOCK(ipgre_lock);
186 #define for_each_ip_tunnel_rcu(start) \
187 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
189 /* Given src, dst and key, find appropriate for input tunnel. */
191 static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
192 __be32 remote, __be32 local,
193 __be32 key, __be16 gre_proto)
195 struct net *net = dev_net(dev);
196 int link = dev->ifindex;
197 unsigned h0 = HASH(remote);
198 unsigned h1 = HASH(key);
199 struct ip_tunnel *t, *cand = NULL;
200 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
201 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
202 ARPHRD_ETHER : ARPHRD_IPGRE;
203 int score, cand_score = 4;
205 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
206 if (local != t->parms.iph.saddr ||
207 remote != t->parms.iph.daddr ||
208 key != t->parms.i_key ||
209 !(t->dev->flags & IFF_UP))
212 if (t->dev->type != ARPHRD_IPGRE &&
213 t->dev->type != dev_type)
217 if (t->parms.link != link)
219 if (t->dev->type != dev_type)
224 if (score < cand_score) {
230 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
231 if (remote != t->parms.iph.daddr ||
232 key != t->parms.i_key ||
233 !(t->dev->flags & IFF_UP))
236 if (t->dev->type != ARPHRD_IPGRE &&
237 t->dev->type != dev_type)
241 if (t->parms.link != link)
243 if (t->dev->type != dev_type)
248 if (score < cand_score) {
254 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
255 if ((local != t->parms.iph.saddr &&
256 (local != t->parms.iph.daddr ||
257 !ipv4_is_multicast(local))) ||
258 key != t->parms.i_key ||
259 !(t->dev->flags & IFF_UP))
262 if (t->dev->type != ARPHRD_IPGRE &&
263 t->dev->type != dev_type)
267 if (t->parms.link != link)
269 if (t->dev->type != dev_type)
274 if (score < cand_score) {
280 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
281 if (t->parms.i_key != key ||
282 !(t->dev->flags & IFF_UP))
285 if (t->dev->type != ARPHRD_IPGRE &&
286 t->dev->type != dev_type)
290 if (t->parms.link != link)
292 if (t->dev->type != dev_type)
297 if (score < cand_score) {
306 dev = ign->fb_tunnel_dev;
307 if (dev->flags & IFF_UP)
308 return netdev_priv(dev);
313 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
314 struct ip_tunnel_parm *parms)
316 __be32 remote = parms->iph.daddr;
317 __be32 local = parms->iph.saddr;
318 __be32 key = parms->i_key;
319 unsigned h = HASH(key);
324 if (remote && !ipv4_is_multicast(remote)) {
329 return &ign->tunnels[prio][h];
332 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
335 return __ipgre_bucket(ign, &t->parms);
338 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
340 struct ip_tunnel **tp = ipgre_bucket(ign, t);
342 spin_lock_bh(&ipgre_lock);
344 rcu_assign_pointer(*tp, t);
345 spin_unlock_bh(&ipgre_lock);
348 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
350 struct ip_tunnel **tp;
352 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
354 spin_lock_bh(&ipgre_lock);
356 spin_unlock_bh(&ipgre_lock);
362 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
363 struct ip_tunnel_parm *parms,
366 __be32 remote = parms->iph.daddr;
367 __be32 local = parms->iph.saddr;
368 __be32 key = parms->i_key;
369 int link = parms->link;
370 struct ip_tunnel *t, **tp;
371 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
373 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
374 if (local == t->parms.iph.saddr &&
375 remote == t->parms.iph.daddr &&
376 key == t->parms.i_key &&
377 link == t->parms.link &&
378 type == t->dev->type)
384 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
385 struct ip_tunnel_parm *parms, int gretap, int create)
387 struct ip_tunnel *t, *nt;
388 struct net_device *dev;
390 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
392 t = ipgre_tunnel_find(net, parms, gretap ? ARPHRD_ETHER : ARPHRD_IPGRE);
397 strlcpy(name, parms->name, IFNAMSIZ);
399 sprintf(name, "gre%%d");
401 dev = alloc_netdev(sizeof(*t), name, gretap ? ipgre_tap_setup
402 : ipgre_tunnel_setup);
406 dev_net_set(dev, net);
408 if (strchr(name, '%')) {
409 if (dev_alloc_name(dev, name) < 0)
414 random_ether_addr(dev->dev_addr);
416 #ifndef GRE_IOCTL_ONLY
417 dev->rtnl_link_ops = gretap ? &ipgre_tap_ops : &ipgre_link_ops;
419 nt = netdev_priv(dev);
422 dev->mtu = ipgre_tunnel_bind_dev(dev);
424 if (register_netdevice(dev) < 0)
428 ipgre_tunnel_link(ign, nt);
436 static void ipgre_tunnel_uninit(struct net_device *dev)
438 struct net *net = dev_net(dev);
439 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
441 ipgre_tunnel_unlink(ign, netdev_priv(dev));
446 static void ipgre_err(struct sk_buff *skb, u32 info)
449 /* All the routers (except for Linux) return only
450 8 bytes of packet payload. It means, that precise relaying of
451 ICMP in the real Internet is absolutely infeasible.
453 Moreover, Cisco "wise men" put GRE key to the third word
454 in GRE header. It makes impossible maintaining even soft state for keyed
455 GRE tunnels with enabled checksum. Tell them "thank you".
457 Well, I wonder, rfc1812 was written by Cisco employee,
458 what the hell these idiots break standrads established
462 struct iphdr *iph = (struct iphdr *)skb->data;
463 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
464 int grehlen = (iph->ihl<<2) + 4;
465 const int type = icmp_hdr(skb)->type;
466 const int code = icmp_hdr(skb)->code;
470 if (skb_headlen(skb) < grehlen)
474 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
475 if (flags&(GRE_VERSION|GRE_ROUTING))
484 /* If only 8 bytes returned, keyed message will be dropped here */
485 if (skb_headlen(skb) < grehlen)
490 case ICMP_PARAMETERPROB:
493 case ICMP_DEST_UNREACH:
496 case ICMP_PORT_UNREACH:
497 /* Impossible event. */
499 case ICMP_FRAG_NEEDED:
500 /* Soft state for pmtu is maintained by IP core. */
503 /* All others are translated to HOST_UNREACH.
504 rfc2003 contains "deep thoughts" about NET_UNREACH,
505 I believe they are just ether pollution. --ANK
510 case ICMP_TIME_EXCEEDED:
511 if (code != ICMP_EXC_TTL)
517 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
519 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
521 if (t == NULL || t->parms.iph.daddr == 0 ||
522 ipv4_is_multicast(t->parms.iph.daddr))
525 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
528 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
532 t->err_time = jiffies;
538 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
540 if (INET_ECN_is_ce(iph->tos)) {
541 if (skb->protocol == htons(ETH_P_IP)) {
542 if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
543 + sizeof(struct iphdr) - skb->data)))
546 IP_ECN_set_ce(ip_hdr(skb));
547 } else if (skb->protocol == htons(ETH_P_IPV6)) {
548 if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
549 + sizeof(struct ipv6hdr) - skb->data)))
552 IP6_ECN_set_ce(ipv6_hdr(skb));
558 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
561 if (skb->protocol == htons(ETH_P_IP))
562 inner = old_iph->tos;
563 else if (skb->protocol == htons(ETH_P_IPV6))
564 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
565 return INET_ECN_encapsulate(tos, inner);
568 static int ipgre_rcv(struct sk_buff *skb)
576 struct ip_tunnel *tunnel;
581 if (!pskb_may_pull(skb, 16))
588 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
589 /* - Version must be 0.
590 - We do not support routing headers.
592 if (flags&(GRE_VERSION|GRE_ROUTING))
595 if (flags&GRE_CSUM) {
596 switch (skb->ip_summed) {
597 case CHECKSUM_COMPLETE:
598 csum = csum_fold(skb->csum);
604 csum = __skb_checksum_complete(skb);
605 skb->ip_summed = CHECKSUM_COMPLETE;
610 key = *(__be32*)(h + offset);
614 seqno = ntohl(*(__be32*)(h + offset));
619 gre_proto = *(__be16 *)(h + 2);
622 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
623 iph->saddr, iph->daddr, key,
625 struct net_device_stats *stats;
626 #ifdef HAVE_NETDEV_STATS
627 stats = &tunnel->dev->stats;
629 stats = &tunnel->stat;
634 skb->protocol = gre_proto;
635 /* WCCP version 1 and 2 protocol decoding.
636 * - Change protocol to IP
637 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
639 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
640 skb->protocol = htons(ETH_P_IP);
641 if ((*(h + offset) & 0xF0) != 0x40)
645 skb->mac_header = skb->network_header;
646 __pskb_pull(skb, offset);
647 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
648 skb->pkt_type = PACKET_HOST;
649 #ifdef CONFIG_NET_IPGRE_BROADCAST
650 if (ipv4_is_multicast(iph->daddr)) {
651 /* Looped back packet, drop it! */
652 if (skb_rtable(skb)->fl.iif == 0)
655 skb->pkt_type = PACKET_BROADCAST;
659 if (((flags&GRE_CSUM) && csum) ||
660 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
661 stats->rx_crc_errors++;
665 if (tunnel->parms.i_flags&GRE_SEQ) {
666 if (!(flags&GRE_SEQ) ||
667 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
668 stats->rx_fifo_errors++;
672 tunnel->i_seqno = seqno + 1;
677 /* Warning: All skb pointers will be invalidated! */
678 if (tunnel->dev->type == ARPHRD_ETHER) {
679 if (!pskb_may_pull(skb, ETH_HLEN)) {
680 stats->rx_length_errors++;
686 skb->protocol = eth_type_trans(skb, tunnel->dev);
687 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
691 stats->rx_bytes += len;
692 skb->dev = tunnel->dev;
696 skb_reset_network_header(skb);
698 /* Invalidates pointers. */
699 ipgre_ecn_decapsulate(iph, skb);
705 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
714 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
716 struct ip_tunnel *tunnel = netdev_priv(dev);
717 struct net_device_stats *stats;
718 #ifdef HAVE_NETDEV_QUEUE_STATS
719 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
721 struct iphdr *old_iph = ip_hdr(skb);
725 struct rtable *rt; /* Route to the other host */
726 struct net_device *tdev; /* Device to other host */
727 struct iphdr *iph; /* Our new IP header */
728 unsigned int max_headroom; /* The extra header space needed */
732 u8 original_protocol;
734 #ifdef HAVE_NETDEV_STATS
737 stats = &tunnel->stat;
740 /* Validate the protocol headers before we try to use them. */
741 original_protocol = skb->protocol;
742 if (skb->protocol == htons(ETH_P_IP)) {
743 if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
744 + sizeof(struct iphdr) - skb->data)))
746 } else if (skb->protocol == htons(ETH_P_IPV6)) {
747 if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
748 + sizeof(struct ipv6hdr) - skb->data)))
752 if (dev->type == ARPHRD_ETHER)
753 IPCB(skb)->flags = 0;
755 #ifdef HAVE_NETDEV_HEADER_OPS
756 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
758 if (dev->hard_header && dev->type == ARPHRD_IPGRE) {
761 tiph = (struct iphdr *)skb->data;
763 gre_hlen = tunnel->hlen;
764 tiph = &tunnel->parms.iph;
767 if ((dst = tiph->daddr) == 0) {
770 if (skb_dst(skb) == NULL) {
771 stats->tx_fifo_errors++;
775 if (skb->protocol == htons(ETH_P_IP)) {
776 rt = skb_rtable(skb);
777 if ((dst = rt->rt_gateway) == 0)
781 else if (skb->protocol == htons(ETH_P_IPV6)) {
782 struct in6_addr *addr6;
784 struct neighbour *neigh = skb_dst(skb)->neighbour;
789 addr6 = (struct in6_addr *)&neigh->primary_key;
790 addr_type = ipv6_addr_type(addr6);
792 if (addr_type == IPV6_ADDR_ANY) {
793 addr6 = &ipv6_hdr(skb)->daddr;
794 addr_type = ipv6_addr_type(addr6);
797 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
800 dst = addr6->s6_addr32[3];
810 if (skb->protocol == htons(ETH_P_IP))
815 struct flowi fl = { .oif = tunnel->parms.link,
818 .saddr = tiph->saddr,
819 .tos = RT_TOS(tos) } },
820 .proto = IPPROTO_GRE };
821 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
822 stats->tx_carrier_errors++;
826 tdev = rt->u.dst.dev;
836 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
837 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
839 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
842 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
845 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
847 /* XXX: Temporarily allow fragmentation since DF doesn't
848 * do the right thing with bridging. */
850 if (skb->protocol == htons(ETH_P_IP)) {
851 df |= (old_iph->frag_off&htons(IP_DF));
853 if ((old_iph->frag_off&htons(IP_DF)) &&
854 mtu < ntohs(old_iph->tot_len)) {
855 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
861 else if (skb->protocol == htons(ETH_P_IPV6)) {
862 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
864 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
865 if ((tunnel->parms.iph.daddr &&
866 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
867 rt6->rt6i_dst.plen == 128) {
868 rt6->rt6i_flags |= RTF_MODIFIED;
869 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
873 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
874 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
881 if (tunnel->err_count > 0) {
882 if (time_before(jiffies,
883 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
886 dst_link_failure(skb);
888 tunnel->err_count = 0;
891 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
893 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
894 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
895 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
898 #ifdef HAVE_NETDEV_QUEUE_STATS
907 skb_set_owner_w(new_skb, skb->sk);
910 old_iph = ip_hdr(skb);
913 skb_reset_transport_header(skb);
914 skb_push(skb, gre_hlen);
915 skb_reset_network_header(skb);
916 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
917 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
920 skb_dst_set(skb, &rt->u.dst);
923 * Push down and install the IPIP header.
928 iph->ihl = sizeof(struct iphdr) >> 2;
930 iph->protocol = IPPROTO_GRE;
931 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
932 iph->daddr = rt->rt_dst;
933 iph->saddr = rt->rt_src;
935 if ((iph->ttl = tiph->ttl) == 0) {
936 if (skb->protocol == htons(ETH_P_IP))
937 iph->ttl = old_iph->ttl;
939 else if (skb->protocol == htons(ETH_P_IPV6))
940 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
943 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
946 skb->protocol = original_protocol;
948 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
949 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
950 htons(ETH_P_TEB) : skb->protocol;
952 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
953 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
955 if (tunnel->parms.o_flags&GRE_SEQ) {
957 *ptr = htonl(tunnel->o_seqno);
960 if (tunnel->parms.o_flags&GRE_KEY) {
961 *ptr = tunnel->parms.o_key;
964 if (tunnel->parms.o_flags&GRE_CSUM) {
966 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
976 dst_link_failure(skb);
984 static int ipgre_tunnel_bind_dev(struct net_device *dev)
986 struct net_device *tdev = NULL;
987 struct ip_tunnel *tunnel;
989 int hlen = LL_MAX_HEADER;
990 int mtu = ETH_DATA_LEN;
991 int addend = sizeof(struct iphdr) + 4;
993 tunnel = netdev_priv(dev);
994 iph = &tunnel->parms.iph;
996 /* Guess output device to choose reasonable mtu and needed_headroom */
999 struct flowi fl = { .oif = tunnel->parms.link,
1001 { .daddr = iph->daddr,
1002 .saddr = iph->saddr,
1003 .tos = RT_TOS(iph->tos) } },
1004 .proto = IPPROTO_GRE };
1006 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
1007 tdev = rt->u.dst.dev;
1011 if (dev->type != ARPHRD_ETHER)
1012 dev->flags |= IFF_POINTOPOINT;
1015 if (!tdev && tunnel->parms.link)
1016 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
1019 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
1020 hlen = tdev->hard_header_len + tdev->needed_headroom;
1022 hlen = tdev->hard_header_len;
1026 dev->iflink = tunnel->parms.link;
1028 /* Precalculate GRE options length */
1029 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1030 if (tunnel->parms.o_flags&GRE_CSUM)
1032 if (tunnel->parms.o_flags&GRE_KEY)
1034 if (tunnel->parms.o_flags&GRE_SEQ)
1037 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
1038 dev->needed_headroom = hlen + addend;
1039 mtu -= dev->hard_header_len + addend;
1041 dev->hard_header_len = hlen + addend;
1044 tunnel->hlen = addend;
1049 /* XXX: Set MTU to the maximum possible value. If we are bridged to a
1050 * device with a larger MTU then packets will be dropped. */
1057 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1060 struct ip_tunnel_parm p;
1061 struct ip_tunnel *t;
1062 struct net *net = dev_net(dev);
1063 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1064 int add_tunnel, gretap;
1069 if (dev == ign->fb_tunnel_dev) {
1070 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1074 t = ipgre_tunnel_locate(net, &p, false, 0);
1077 t = netdev_priv(dev);
1078 memcpy(&p, &t->parms, sizeof(p));
1079 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1088 if (!capable(CAP_NET_ADMIN))
1092 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1096 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1097 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1098 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1101 add_tunnel = (cmd == SIOCADDTUNNEL || cmd == SIOCADDGRETAP);
1102 gretap = (cmd == SIOCADDGRETAP || cmd == SIOCCHGGRETAP);
1105 p.iph.frag_off |= htons(IP_DF);
1107 if (!(p.i_flags&GRE_KEY))
1109 if (!(p.o_flags&GRE_KEY))
1112 t = ipgre_tunnel_locate(net, &p, gretap, add_tunnel);
1114 if (dev != ign->fb_tunnel_dev && !add_tunnel) {
1116 if (t->dev != dev) {
1121 unsigned nflags = 0;
1123 t = netdev_priv(dev);
1125 if (ipv4_is_multicast(p.iph.daddr))
1126 nflags = IFF_BROADCAST;
1127 else if (p.iph.daddr)
1128 nflags = IFF_POINTOPOINT;
1130 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1134 ipgre_tunnel_unlink(ign, t);
1135 t->parms.iph.saddr = p.iph.saddr;
1136 t->parms.iph.daddr = p.iph.daddr;
1137 t->parms.i_key = p.i_key;
1138 t->parms.o_key = p.o_key;
1139 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1140 memcpy(dev->broadcast, &p.iph.daddr, 4);
1141 ipgre_tunnel_link(ign, t);
1142 netdev_state_change(dev);
1149 t->parms.iph.ttl = p.iph.ttl;
1150 t->parms.iph.tos = p.iph.tos;
1151 t->parms.iph.frag_off = p.iph.frag_off;
1152 if (t->parms.link != p.link) {
1153 t->parms.link = p.link;
1154 dev->mtu = ipgre_tunnel_bind_dev(dev);
1155 netdev_state_change(dev);
1158 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1161 err = (add_tunnel ? -ENOBUFS : -ENOENT);
1166 if (!capable(CAP_NET_ADMIN))
1169 if (dev == ign->fb_tunnel_dev) {
1171 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1174 if ((t = ipgre_tunnel_locate(net, &p, false, 0)) == NULL)
1177 if (t == netdev_priv(ign->fb_tunnel_dev))
1181 unregister_netdevice(dev);
1193 #ifndef HAVE_NETDEV_STATS
1194 static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1196 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1200 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1202 struct ip_tunnel *tunnel = netdev_priv(dev);
1204 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
1205 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1207 new_mtu > 0xFFF8 - tunnel->hlen)
1214 /* Nice toy. Unfortunately, useless in real life :-)
1215 It allows to construct virtual multiprotocol broadcast "LAN"
1216 over the Internet, provided multicast routing is tuned.
1219 I have no idea was this bicycle invented before me,
1220 so that I had to set ARPHRD_IPGRE to a random value.
1221 I have an impression, that Cisco could make something similar,
1222 but this feature is apparently missing in IOS<=11.2(8).
1224 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1225 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1227 ping -t 255 224.66.66.66
1229 If nobody answers, mbone does not work.
1231 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1232 ip addr add 10.66.66.<somewhat>/24 dev Universe
1233 ifconfig Universe up
1234 ifconfig Universe add fe80::<Your_real_addr>/10
1235 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1238 ftp fec0:6666:6666::193.233.7.65
1243 #ifdef HAVE_NETDEV_HEADER_OPS
1244 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1245 unsigned short type,
1246 const void *daddr, const void *saddr, unsigned len)
1248 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
1249 void *daddr, void *saddr, unsigned len)
1252 struct ip_tunnel *t = netdev_priv(dev);
1253 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1254 __be16 *p = (__be16*)(iph+1);
1256 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1257 p[0] = t->parms.o_flags;
1261 * Set the source hardware address.
1265 memcpy(&iph->saddr, saddr, 4);
1268 memcpy(&iph->daddr, daddr, 4);
1271 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1277 #ifdef HAVE_NETDEV_HEADER_OPS
1278 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1280 static int ipgre_header_parse(struct sk_buff *skb, unsigned char *haddr)
1283 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
1284 memcpy(haddr, &iph->saddr, 4);
1288 #ifdef HAVE_NETDEV_HEADER_OPS
1289 static const struct header_ops ipgre_header_ops = {
1290 .create = ipgre_header,
1291 .parse = ipgre_header_parse,
1295 #ifdef CONFIG_NET_IPGRE_BROADCAST
1296 static int ipgre_open(struct net_device *dev)
1298 struct ip_tunnel *t = netdev_priv(dev);
1300 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1301 struct flowi fl = { .oif = t->parms.link,
1303 { .daddr = t->parms.iph.daddr,
1304 .saddr = t->parms.iph.saddr,
1305 .tos = RT_TOS(t->parms.iph.tos) } },
1306 .proto = IPPROTO_GRE };
1308 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1309 return -EADDRNOTAVAIL;
1310 dev = rt->u.dst.dev;
1312 if (__in_dev_get_rtnl(dev) == NULL)
1313 return -EADDRNOTAVAIL;
1314 t->mlink = dev->ifindex;
1315 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1320 static int ipgre_close(struct net_device *dev)
1322 struct ip_tunnel *t = netdev_priv(dev);
1324 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1325 struct in_device *in_dev;
1326 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1328 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1337 static void ethtool_getinfo(struct net_device *dev,
1338 struct ethtool_drvinfo *info)
1340 strcpy(info->driver, "ip_gre");
1341 strcpy(info->version, "Open vSwitch "VERSION BUILDNR);
1342 strcpy(info->bus_info, dev->type == ARPHRD_ETHER ? "gretap" : "gre");
1345 static struct ethtool_ops ethtool_ops = {
1346 .get_drvinfo = ethtool_getinfo,
1349 #ifdef HAVE_NET_DEVICE_OPS
1350 static const struct net_device_ops ipgre_netdev_ops = {
1351 .ndo_init = ipgre_tunnel_init,
1352 .ndo_uninit = ipgre_tunnel_uninit,
1353 #ifdef CONFIG_NET_IPGRE_BROADCAST
1354 .ndo_open = ipgre_open,
1355 .ndo_stop = ipgre_close,
1357 .ndo_start_xmit = ipgre_tunnel_xmit,
1358 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1359 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1363 static void ipgre_tunnel_setup(struct net_device *dev)
1365 #ifdef HAVE_NET_DEVICE_OPS
1366 dev->netdev_ops = &ipgre_netdev_ops;
1368 dev->init = ipgre_tunnel_init;
1369 dev->uninit = ipgre_tunnel_uninit;
1370 dev->hard_start_xmit = ipgre_tunnel_xmit;
1371 #ifndef HAVE_NETDEV_STATS
1372 dev->get_stats = ipgre_tunnel_get_stats;
1374 dev->do_ioctl = ipgre_tunnel_ioctl;
1375 dev->change_mtu = ipgre_tunnel_change_mtu;
1376 #endif /* HAVE_NET_DEVICE_OPS */
1377 dev->destructor = free_netdev;
1379 dev->type = ARPHRD_IPGRE;
1380 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
1381 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1383 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1385 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1386 dev->flags = IFF_NOARP;
1389 dev->features |= NETIF_F_NETNS_LOCAL;
1390 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1392 SET_ETHTOOL_OPS(dev, ðtool_ops);
1395 static int ipgre_tunnel_init(struct net_device *dev)
1397 struct ip_tunnel *tunnel;
1400 tunnel = netdev_priv(dev);
1401 iph = &tunnel->parms.iph;
1404 strcpy(tunnel->parms.name, dev->name);
1406 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1407 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1410 #ifdef CONFIG_NET_IPGRE_BROADCAST
1411 if (ipv4_is_multicast(iph->daddr)) {
1414 dev->flags = IFF_BROADCAST;
1415 #ifdef HAVE_NETDEV_HEADER_OPS
1416 dev->header_ops = &ipgre_header_ops;
1418 dev->hard_header = ipgre_header;
1419 dev->hard_header_parse = ipgre_header_parse;
1421 #ifndef HAVE_NET_DEVICE_OPS
1422 dev->open = ipgre_open;
1423 dev->stop = ipgre_close;
1428 #ifdef HAVE_NETDEV_HEADER_OPS
1429 dev->header_ops = &ipgre_header_ops;
1431 dev->hard_header = ipgre_header;
1432 dev->hard_header_parse = ipgre_header_parse;
1439 #ifdef HAVE_NET_DEVICE_OPS
1440 static void ipgre_fb_tunnel_init(struct net_device *dev)
1442 static int ipgre_fb_tunnel_init(struct net_device *dev)
1445 struct ip_tunnel *tunnel = netdev_priv(dev);
1446 struct iphdr *iph = &tunnel->parms.iph;
1447 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1450 strcpy(tunnel->parms.name, dev->name);
1453 iph->protocol = IPPROTO_GRE;
1455 tunnel->hlen = sizeof(struct iphdr) + 4;
1458 ign->tunnels_wc[0] = tunnel;
1460 #ifndef HAVE_NET_DEVICE_OPS
1465 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32)
1466 static struct net_protocol ipgre_protocol = {
1468 static const struct net_protocol ipgre_protocol = {
1470 .handler = ipgre_rcv,
1471 .err_handler = ipgre_err,
1472 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
1477 static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1481 for (prio = 0; prio < 4; prio++) {
1483 for (h = 0; h < HASH_SIZE; h++) {
1484 struct ip_tunnel *t = ign->tunnels[prio][h];
1487 unregister_netdevice_queue(t->dev, head);
1494 static int ipgre_init_net(struct net *net)
1496 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1499 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), GRE_IOCTL_DEVICE,
1500 ipgre_tunnel_setup);
1501 if (!ign->fb_tunnel_dev) {
1505 dev_net_set(ign->fb_tunnel_dev, net);
1507 #ifdef HAVE_NET_DEVICE_OPS
1508 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1510 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1512 #ifndef GRE_IOCTL_ONLY
1513 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1516 if ((err = register_netdev(ign->fb_tunnel_dev)))
1522 free_netdev(ign->fb_tunnel_dev);
1527 static void ipgre_exit_net(struct net *net)
1529 struct ipgre_net *ign;
1532 ign = net_generic(net, ipgre_net_id);
1534 ipgre_destroy_tunnels(ign, &list);
1535 unregister_netdevice_many(&list);
1539 static struct pernet_operations ipgre_net_ops = {
1540 .init = ipgre_init_net,
1541 .exit = ipgre_exit_net,
1542 .id = &ipgre_net_id,
1543 .size = sizeof(struct ipgre_net),
1546 static int ipgre_tap_init(struct net_device *dev)
1548 struct ip_tunnel *tunnel;
1550 tunnel = netdev_priv(dev);
1553 strcpy(tunnel->parms.name, dev->name);
1555 ipgre_tunnel_bind_dev(dev);
1560 #ifdef HAVE_NET_DEVICE_OPS
1561 static const struct net_device_ops ipgre_tap_netdev_ops = {
1562 .ndo_init = ipgre_tap_init,
1563 .ndo_uninit = ipgre_tunnel_uninit,
1564 .ndo_start_xmit = ipgre_tunnel_xmit,
1565 .ndo_set_mac_address = eth_mac_addr,
1566 .ndo_validate_addr = eth_validate_addr,
1567 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1568 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1572 static void ipgre_tap_setup(struct net_device *dev)
1576 #ifdef HAVE_NET_DEVICE_OPS
1577 dev->netdev_ops = &ipgre_tap_netdev_ops;
1579 dev->init = ipgre_tap_init;
1580 dev->uninit = ipgre_tunnel_uninit;
1581 dev->hard_start_xmit = ipgre_tunnel_xmit;
1582 #ifndef HAVE_NETDEV_STATS
1583 dev->get_stats = ipgre_tunnel_get_stats;
1585 dev->do_ioctl = ipgre_tunnel_ioctl;
1586 dev->change_mtu = ipgre_tunnel_change_mtu;
1587 #endif /* HAVE_NET_DEVICE_OPS */
1588 dev->destructor = free_netdev;
1591 dev->features |= NETIF_F_NETNS_LOCAL;
1592 dev->tx_queue_len = 0;
1594 SET_ETHTOOL_OPS(dev, ðtool_ops);
1597 #ifndef GRE_IOCTL_ONLY
1598 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1606 if (data[IFLA_GRE_IFLAGS])
1607 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1608 if (data[IFLA_GRE_OFLAGS])
1609 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1610 if (flags & (GRE_VERSION|GRE_ROUTING))
1616 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1620 if (tb[IFLA_ADDRESS]) {
1621 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1623 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1624 return -EADDRNOTAVAIL;
1630 if (data[IFLA_GRE_REMOTE]) {
1631 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1637 return ipgre_tunnel_validate(tb, data);
1640 static void ipgre_netlink_parms(struct nlattr *data[],
1641 struct ip_tunnel_parm *parms)
1643 memset(parms, 0, sizeof(*parms));
1645 parms->iph.protocol = IPPROTO_GRE;
1650 if (data[IFLA_GRE_LINK])
1651 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1653 if (data[IFLA_GRE_IFLAGS])
1654 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1656 if (data[IFLA_GRE_OFLAGS])
1657 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1659 if (data[IFLA_GRE_IKEY])
1660 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1662 if (data[IFLA_GRE_OKEY])
1663 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1665 if (data[IFLA_GRE_LOCAL])
1666 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1668 if (data[IFLA_GRE_REMOTE])
1669 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1671 if (data[IFLA_GRE_TTL])
1672 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1674 if (data[IFLA_GRE_TOS])
1675 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1677 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1678 parms->iph.frag_off = htons(IP_DF);
1681 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,33)
1682 static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1683 struct nlattr *data[])
1685 static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1686 struct nlattr *data[])
1689 struct ip_tunnel *nt;
1690 struct net *net = dev_net(dev);
1691 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1695 nt = netdev_priv(dev);
1696 ipgre_netlink_parms(data, &nt->parms);
1698 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1701 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1702 random_ether_addr(dev->dev_addr);
1704 mtu = ipgre_tunnel_bind_dev(dev);
1708 err = register_netdevice(dev);
1713 ipgre_tunnel_link(ign, nt);
1719 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1720 struct nlattr *data[])
1722 struct ip_tunnel *t, *nt;
1723 struct net *net = dev_net(dev);
1724 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1725 struct ip_tunnel_parm p;
1728 if (dev == ign->fb_tunnel_dev)
1731 nt = netdev_priv(dev);
1732 ipgre_netlink_parms(data, &p);
1734 t = ipgre_tunnel_locate(net, &p, false, 0);
1742 if (dev->type != ARPHRD_ETHER) {
1743 unsigned nflags = 0;
1745 if (ipv4_is_multicast(p.iph.daddr))
1746 nflags = IFF_BROADCAST;
1747 else if (p.iph.daddr)
1748 nflags = IFF_POINTOPOINT;
1750 if ((dev->flags ^ nflags) &
1751 (IFF_POINTOPOINT | IFF_BROADCAST))
1755 ipgre_tunnel_unlink(ign, t);
1756 t->parms.iph.saddr = p.iph.saddr;
1757 t->parms.iph.daddr = p.iph.daddr;
1758 t->parms.i_key = p.i_key;
1759 if (dev->type != ARPHRD_ETHER) {
1760 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1761 memcpy(dev->broadcast, &p.iph.daddr, 4);
1763 ipgre_tunnel_link(ign, t);
1764 netdev_state_change(dev);
1767 t->parms.o_key = p.o_key;
1768 t->parms.iph.ttl = p.iph.ttl;
1769 t->parms.iph.tos = p.iph.tos;
1770 t->parms.iph.frag_off = p.iph.frag_off;
1772 if (t->parms.link != p.link) {
1773 t->parms.link = p.link;
1774 mtu = ipgre_tunnel_bind_dev(dev);
1777 netdev_state_change(dev);
1783 static size_t ipgre_get_size(const struct net_device *dev)
1788 /* IFLA_GRE_IFLAGS */
1790 /* IFLA_GRE_OFLAGS */
1796 /* IFLA_GRE_LOCAL */
1798 /* IFLA_GRE_REMOTE */
1804 /* IFLA_GRE_PMTUDISC */
1809 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1811 struct ip_tunnel *t = netdev_priv(dev);
1812 struct ip_tunnel_parm *p = &t->parms;
1814 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1815 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1816 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1817 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1818 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1819 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1820 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1821 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1822 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1823 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1831 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1832 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1833 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1834 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1835 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1836 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1837 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1838 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1839 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1840 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1841 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1844 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1846 .maxtype = IFLA_GRE_MAX,
1847 .policy = ipgre_policy,
1848 .priv_size = sizeof(struct ip_tunnel),
1849 .setup = ipgre_tunnel_setup,
1850 .validate = ipgre_tunnel_validate,
1851 .newlink = ipgre_newlink,
1852 .changelink = ipgre_changelink,
1853 .get_size = ipgre_get_size,
1854 .fill_info = ipgre_fill_info,
1857 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1859 .maxtype = IFLA_GRE_MAX,
1860 .policy = ipgre_policy,
1861 .priv_size = sizeof(struct ip_tunnel),
1862 .setup = ipgre_tap_setup,
1863 .validate = ipgre_tap_validate,
1864 .newlink = ipgre_newlink,
1865 .changelink = ipgre_changelink,
1866 .get_size = ipgre_get_size,
1867 .fill_info = ipgre_fill_info,
1872 * And now the modules code and kernel interface.
1875 static int __init ipgre_init(void)
1879 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1881 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1882 printk(KERN_INFO "ipgre init: can't add protocol\n");
1886 err = register_pernet_device(&ipgre_net_ops);
1888 goto gen_device_failed;
1890 #ifndef GRE_IOCTL_ONLY
1891 err = rtnl_link_register(&ipgre_link_ops);
1893 goto rtnl_link_failed;
1895 err = rtnl_link_register(&ipgre_tap_ops);
1897 goto tap_ops_failed;
1903 #ifndef GRE_IOCTL_ONLY
1905 rtnl_link_unregister(&ipgre_link_ops);
1907 unregister_pernet_device(&ipgre_net_ops);
1910 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1915 static void __exit ipgre_fini(void)
1917 #ifndef GRE_IOCTL_ONLY
1918 rtnl_link_unregister(&ipgre_tap_ops);
1919 rtnl_link_unregister(&ipgre_link_ops);
1921 unregister_pernet_device(&ipgre_net_ops);
1922 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1923 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1926 module_init(ipgre_init);
1927 module_exit(ipgre_fini);
1928 MODULE_DESCRIPTION("GRE over IPv4 tunneling driver");
1929 MODULE_LICENSE("GPL");
1930 #ifndef GRE_IOCTL_ONLY
1931 MODULE_ALIAS_RTNL_LINK("gre");
1932 MODULE_ALIAS_RTNL_LINK("gretap");