1 /* ip_gre driver port to Linux 2.6.18 and greater */
3 #include <linux/version.h>
4 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
5 #define HAVE_NETDEV_STATS
7 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
8 #define HAVE_NETDEV_HEADER_OPS
10 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
11 #define HAVE_NETDEV_NEEDED_HEADROOM
15 * Linux NET3: GRE over IP protocol decoder.
17 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version.
26 #include <linux/capability.h>
27 #include <linux/ethtool.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <asm/uaccess.h>
32 #include <linux/skbuff.h>
33 #include <linux/netdevice.h>
35 #include <linux/tcp.h>
36 #include <linux/udp.h>
37 #include <linux/if_arp.h>
38 #include <linux/mroute.h>
39 #include <linux/init.h>
40 #include <linux/in6.h>
41 #include <linux/inetdevice.h>
42 #include <linux/igmp.h>
43 #include <linux/netfilter_ipv4.h>
44 #include <linux/etherdevice.h>
45 #include <linux/if_ether.h>
50 #include <net/protocol.h>
53 #include <net/checksum.h>
54 #include <net/dsfield.h>
55 #include <net/inet_ecn.h>
57 #include <net/net_namespace.h>
58 #include <net/netns/generic.h>
62 #include <net/ip6_fib.h>
63 #include <net/ip6_route.h>
67 #include "openvswitch/gre.h"
69 #ifndef GRE_IOCTL_ONLY
70 #include <net/rtnetlink.h>
77 1. The most important issue is detecting local dead loops.
78 They would cause complete host lockup in transmit, which
79 would be "resolved" by stack overflow or, if queueing is enabled,
80 with infinite looping in net_bh.
82 We cannot track such dead loops during route installation,
83 it is infeasible task. The most general solutions would be
84 to keep skb->encapsulation counter (sort of local ttl),
85 and silently drop packet when it expires. It is the best
86 solution, but it supposes maintaing new variable in ALL
87 skb, even if no tunneling is used.
89 Current solution: HARD_TX_LOCK lock breaks dead loops.
93 2. Networking dead loops would not kill routers, but would really
94 kill network. IP hop limit plays role of "t->recursion" in this case,
95 if we copy it from packet being encapsulated to upper header.
96 It is very good solution, but it introduces two problems:
98 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
99 do not work over tunnels.
100 - traceroute does not work. I planned to relay ICMP from tunnel,
101 so that this problem would be solved and traceroute output
102 would even more informative. This idea appeared to be wrong:
103 only Linux complies to rfc1812 now (yes, guys, Linux is the only
104 true router now :-)), all routers (at least, in neighbourhood of mine)
105 return only 8 bytes of payload. It is the end.
107 Hence, if we want that OSPF worked or traceroute said something reasonable,
108 we should search for another solution.
110 One of them is to parse packet trying to detect inner encapsulation
111 made by our node. It is difficult or even impossible, especially,
112 taking into account fragmentation. TO be short, tt is not solution at all.
114 Current solution: The solution was UNEXPECTEDLY SIMPLE.
115 We force DF flag on tunnels with preconfigured hop limit,
116 that is ALL. :-) Well, it does not remove the problem completely,
117 but exponential growth of network traffic is changed to linear
118 (branches, that exceed pmtu are pruned) and tunnel mtu
119 fastly degrades to value <68, where looping stops.
120 Yes, it is not good if there exists a router in the loop,
121 which does not force DF, even when encapsulating packets have DF set.
122 But it is not our problem! Nobody could accuse us, we made
123 all that we could make. Even if it is your gated who injected
124 fatal route to network, even if it were you who configured
125 fatal static route: you are innocent. :-)
127 XXX: Forcing the DF flag on was done only when setting up tunnels via the
128 ioctl interface and not Netlink. Since it prevents some operations
129 and isn't very transparent I removed it. It seems nobody really
130 cared about it anyways.
131 Moral: don't create loops.
133 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
134 practically identical code. It would be good to glue them
135 together, but it is not very evident, how to make them modular.
136 sit is integral part of IPv6, ipip and gre are naturally modular.
137 We could extract common parts (hash table, ioctl etc)
138 to a separate module (ip_tunnel.c).
143 #ifndef GRE_IOCTL_ONLY
144 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
145 static struct rtnl_link_ops ipgre_tap_ops __read_mostly;
147 static int ipgre_tunnel_init(struct net_device *dev);
148 static void ipgre_tunnel_setup(struct net_device *dev);
149 static void ipgre_tap_setup(struct net_device *dev);
150 static int ipgre_tunnel_bind_dev(struct net_device *dev);
154 static int ipgre_net_id __read_mostly;
156 struct ip_tunnel *tunnels[4][HASH_SIZE];
158 struct net_device *fb_tunnel_dev;
161 /* Tunnel hash table */
171 We require exact key match i.e. if a key is present in packet
172 it will match only tunnel with the same key; if it is not present,
173 it will match only keyless tunnel.
175 All keysless packets, if not matched configured keyless tunnels
176 will match fallback tunnel.
179 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
181 #define tunnels_r_l tunnels[3]
182 #define tunnels_r tunnels[2]
183 #define tunnels_l tunnels[1]
184 #define tunnels_wc tunnels[0]
186 * Locking : hash tables are protected by RCU and a spinlock
188 static DEFINE_SPINLOCK(ipgre_lock);
190 #define for_each_ip_tunnel_rcu(start) \
191 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
193 /* Given src, dst and key, find appropriate for input tunnel. */
195 static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
196 __be32 remote, __be32 local,
197 __be32 key, __be16 gre_proto)
199 struct net *net = dev_net(dev);
200 int link = dev->ifindex;
201 unsigned h0 = HASH(remote);
202 unsigned h1 = HASH(key);
203 struct ip_tunnel *t, *cand = NULL;
204 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
205 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
206 ARPHRD_ETHER : ARPHRD_IPGRE;
207 int score, cand_score = 4;
209 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
210 if (local != t->parms.iph.saddr ||
211 remote != t->parms.iph.daddr ||
212 key != t->parms.i_key ||
213 !(t->dev->flags & IFF_UP))
216 if (t->dev->type != ARPHRD_IPGRE &&
217 t->dev->type != dev_type)
221 if (t->parms.link != link)
223 if (t->dev->type != dev_type)
228 if (score < cand_score) {
234 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
235 if (remote != t->parms.iph.daddr ||
236 key != t->parms.i_key ||
237 !(t->dev->flags & IFF_UP))
240 if (t->dev->type != ARPHRD_IPGRE &&
241 t->dev->type != dev_type)
245 if (t->parms.link != link)
247 if (t->dev->type != dev_type)
252 if (score < cand_score) {
258 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
259 if ((local != t->parms.iph.saddr &&
260 (local != t->parms.iph.daddr ||
261 !ipv4_is_multicast(local))) ||
262 key != t->parms.i_key ||
263 !(t->dev->flags & IFF_UP))
266 if (t->dev->type != ARPHRD_IPGRE &&
267 t->dev->type != dev_type)
271 if (t->parms.link != link)
273 if (t->dev->type != dev_type)
278 if (score < cand_score) {
284 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
285 if (t->parms.i_key != key ||
286 !(t->dev->flags & IFF_UP))
289 if (t->dev->type != ARPHRD_IPGRE &&
290 t->dev->type != dev_type)
294 if (t->parms.link != link)
296 if (t->dev->type != dev_type)
301 if (score < cand_score) {
310 dev = ign->fb_tunnel_dev;
311 if (dev->flags & IFF_UP)
312 return netdev_priv(dev);
317 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
318 struct ip_tunnel_parm *parms)
320 __be32 remote = parms->iph.daddr;
321 __be32 local = parms->iph.saddr;
322 __be32 key = parms->i_key;
323 unsigned h = HASH(key);
328 if (remote && !ipv4_is_multicast(remote)) {
333 return &ign->tunnels[prio][h];
336 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
339 return __ipgre_bucket(ign, &t->parms);
342 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
344 struct ip_tunnel **tp = ipgre_bucket(ign, t);
346 spin_lock_bh(&ipgre_lock);
348 rcu_assign_pointer(*tp, t);
349 spin_unlock_bh(&ipgre_lock);
352 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
354 struct ip_tunnel **tp;
356 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
358 spin_lock_bh(&ipgre_lock);
360 spin_unlock_bh(&ipgre_lock);
366 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
367 struct ip_tunnel_parm *parms,
370 __be32 remote = parms->iph.daddr;
371 __be32 local = parms->iph.saddr;
372 __be32 key = parms->i_key;
373 int link = parms->link;
374 struct ip_tunnel *t, **tp;
375 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
377 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
378 if (local == t->parms.iph.saddr &&
379 remote == t->parms.iph.daddr &&
380 key == t->parms.i_key &&
381 link == t->parms.link &&
382 type == t->dev->type)
388 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
389 struct ip_tunnel_parm *parms, int gretap, int create)
391 struct ip_tunnel *t, *nt;
392 struct net_device *dev;
394 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
396 t = ipgre_tunnel_find(net, parms, gretap ? ARPHRD_ETHER : ARPHRD_IPGRE);
401 strlcpy(name, parms->name, IFNAMSIZ);
403 sprintf(name, "gre%%d");
405 dev = alloc_netdev(sizeof(*t), name, gretap ? ipgre_tap_setup
406 : ipgre_tunnel_setup);
410 dev_net_set(dev, net);
412 if (strchr(name, '%')) {
413 if (dev_alloc_name(dev, name) < 0)
418 random_ether_addr(dev->dev_addr);
420 #ifndef GRE_IOCTL_ONLY
421 dev->rtnl_link_ops = gretap ? &ipgre_tap_ops : &ipgre_link_ops;
423 nt = netdev_priv(dev);
426 dev->mtu = ipgre_tunnel_bind_dev(dev);
428 if (register_netdevice(dev) < 0)
432 ipgre_tunnel_link(ign, nt);
440 static void ipgre_tunnel_uninit(struct net_device *dev)
442 struct net *net = dev_net(dev);
443 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
445 ipgre_tunnel_unlink(ign, netdev_priv(dev));
450 static void ipgre_err(struct sk_buff *skb, u32 info)
453 /* All the routers (except for Linux) return only
454 8 bytes of packet payload. It means, that precise relaying of
455 ICMP in the real Internet is absolutely infeasible.
457 Moreover, Cisco "wise men" put GRE key to the third word
458 in GRE header. It makes impossible maintaining even soft state for keyed
459 GRE tunnels with enabled checksum. Tell them "thank you".
461 Well, I wonder, rfc1812 was written by Cisco employee,
462 what the hell these idiots break standrads established
466 struct iphdr *iph = (struct iphdr *)skb->data;
467 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
468 int grehlen = (iph->ihl<<2) + 4;
469 const int type = icmp_hdr(skb)->type;
470 const int code = icmp_hdr(skb)->code;
474 if (skb_headlen(skb) < grehlen)
478 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
479 if (flags&(GRE_VERSION|GRE_ROUTING))
488 /* If only 8 bytes returned, keyed message will be dropped here */
489 if (skb_headlen(skb) < grehlen)
494 case ICMP_PARAMETERPROB:
497 case ICMP_DEST_UNREACH:
500 case ICMP_PORT_UNREACH:
501 /* Impossible event. */
503 case ICMP_FRAG_NEEDED:
504 /* Soft state for pmtu is maintained by IP core. */
507 /* All others are translated to HOST_UNREACH.
508 rfc2003 contains "deep thoughts" about NET_UNREACH,
509 I believe they are just ether pollution. --ANK
514 case ICMP_TIME_EXCEEDED:
515 if (code != ICMP_EXC_TTL)
521 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
523 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
525 if (t == NULL || t->parms.iph.daddr == 0 ||
526 ipv4_is_multicast(t->parms.iph.daddr))
529 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
532 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
536 t->err_time = jiffies;
542 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
544 if (INET_ECN_is_ce(iph->tos)) {
545 if (skb->protocol == htons(ETH_P_IP)) {
546 if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
547 + sizeof(struct iphdr) - skb->data)))
550 IP_ECN_set_ce(ip_hdr(skb));
551 } else if (skb->protocol == htons(ETH_P_IPV6)) {
552 if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
553 + sizeof(struct ipv6hdr) - skb->data)))
556 IP6_ECN_set_ce(ipv6_hdr(skb));
562 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
565 if (skb->protocol == htons(ETH_P_IP))
566 inner = old_iph->tos;
567 else if (skb->protocol == htons(ETH_P_IPV6))
568 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
569 return INET_ECN_encapsulate(tos, inner);
572 static int ipgre_rcv(struct sk_buff *skb)
580 struct ip_tunnel *tunnel;
585 if (!pskb_may_pull(skb, 16))
592 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
593 /* - Version must be 0.
594 - We do not support routing headers.
596 if (flags&(GRE_VERSION|GRE_ROUTING))
599 if (flags&GRE_CSUM) {
600 switch (skb->ip_summed) {
601 case CHECKSUM_COMPLETE:
602 csum = csum_fold(skb->csum);
608 csum = __skb_checksum_complete(skb);
609 skb->ip_summed = CHECKSUM_COMPLETE;
614 key = *(__be32*)(h + offset);
618 seqno = ntohl(*(__be32*)(h + offset));
623 gre_proto = *(__be16 *)(h + 2);
626 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
627 iph->saddr, iph->daddr, key,
629 struct net_device_stats *stats;
630 #ifdef HAVE_NETDEV_STATS
631 stats = &tunnel->dev->stats;
633 stats = &tunnel->stat;
638 skb->protocol = gre_proto;
639 /* WCCP version 1 and 2 protocol decoding.
640 * - Change protocol to IP
641 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
643 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
644 skb->protocol = htons(ETH_P_IP);
645 if ((*(h + offset) & 0xF0) != 0x40)
649 skb->mac_header = skb->network_header;
650 __pskb_pull(skb, offset);
651 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
652 skb->pkt_type = PACKET_HOST;
653 #ifdef CONFIG_NET_IPGRE_BROADCAST
654 if (ipv4_is_multicast(iph->daddr)) {
655 /* Looped back packet, drop it! */
656 if (skb_rtable(skb)->fl.iif == 0)
659 skb->pkt_type = PACKET_BROADCAST;
663 if (((flags&GRE_CSUM) && csum) ||
664 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
665 stats->rx_crc_errors++;
669 if (tunnel->parms.i_flags&GRE_SEQ) {
670 if (!(flags&GRE_SEQ) ||
671 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
672 stats->rx_fifo_errors++;
676 tunnel->i_seqno = seqno + 1;
681 /* Warning: All skb pointers will be invalidated! */
682 if (tunnel->dev->type == ARPHRD_ETHER) {
683 if (!pskb_may_pull(skb, ETH_HLEN)) {
684 stats->rx_length_errors++;
690 skb->protocol = eth_type_trans(skb, tunnel->dev);
691 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
695 stats->rx_bytes += len;
696 skb->dev = tunnel->dev;
700 skb_reset_network_header(skb);
702 /* Invalidates pointers. */
703 ipgre_ecn_decapsulate(iph, skb);
709 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
718 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
720 struct ip_tunnel *tunnel = netdev_priv(dev);
721 struct net_device_stats *stats;
722 #ifdef HAVE_NETDEV_QUEUE_STATS
723 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
725 struct iphdr *old_iph = ip_hdr(skb);
729 struct rtable *rt; /* Route to the other host */
730 struct net_device *tdev; /* Device to other host */
731 struct iphdr *iph; /* Our new IP header */
732 unsigned int max_headroom; /* The extra header space needed */
736 u8 original_protocol;
738 #ifdef HAVE_NETDEV_STATS
741 stats = &tunnel->stat;
744 /* Validate the protocol headers before we try to use them. */
745 original_protocol = skb->protocol;
746 if (skb->protocol == htons(ETH_P_IP)) {
747 if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
748 + sizeof(struct iphdr) - skb->data)))
750 } else if (skb->protocol == htons(ETH_P_IPV6)) {
751 if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
752 + sizeof(struct ipv6hdr) - skb->data)))
756 if (dev->type == ARPHRD_ETHER)
757 IPCB(skb)->flags = 0;
759 #ifdef HAVE_NETDEV_HEADER_OPS
760 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
762 if (dev->hard_header && dev->type == ARPHRD_IPGRE) {
765 tiph = (struct iphdr *)skb->data;
767 gre_hlen = tunnel->hlen;
768 tiph = &tunnel->parms.iph;
771 if ((dst = tiph->daddr) == 0) {
774 if (skb_dst(skb) == NULL) {
775 stats->tx_fifo_errors++;
779 if (skb->protocol == htons(ETH_P_IP)) {
780 rt = skb_rtable(skb);
781 if ((dst = rt->rt_gateway) == 0)
785 else if (skb->protocol == htons(ETH_P_IPV6)) {
786 struct in6_addr *addr6;
788 struct neighbour *neigh = skb_dst(skb)->neighbour;
793 addr6 = (struct in6_addr *)&neigh->primary_key;
794 addr_type = ipv6_addr_type(addr6);
796 if (addr_type == IPV6_ADDR_ANY) {
797 addr6 = &ipv6_hdr(skb)->daddr;
798 addr_type = ipv6_addr_type(addr6);
801 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
804 dst = addr6->s6_addr32[3];
814 if (skb->protocol == htons(ETH_P_IP))
816 else if (skb->protocol == htons(ETH_P_IPV6))
817 tos = ipv6_get_dsfield(ipv6_hdr(skb));
821 struct flowi fl = { .oif = tunnel->parms.link,
824 .saddr = tiph->saddr,
825 .tos = RT_TOS(tos) } },
826 .proto = IPPROTO_GRE };
827 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
828 stats->tx_carrier_errors++;
832 tdev = rt->u.dst.dev;
842 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
843 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
845 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
848 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
851 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
853 /* XXX: Temporarily allow fragmentation since DF doesn't
854 * do the right thing with bridging. */
856 if (skb->protocol == htons(ETH_P_IP)) {
857 df |= (old_iph->frag_off&htons(IP_DF));
859 if ((old_iph->frag_off&htons(IP_DF)) &&
860 mtu < ntohs(old_iph->tot_len)) {
861 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
867 else if (skb->protocol == htons(ETH_P_IPV6)) {
868 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
870 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
871 if ((tunnel->parms.iph.daddr &&
872 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
873 rt6->rt6i_dst.plen == 128) {
874 rt6->rt6i_flags |= RTF_MODIFIED;
875 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
879 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
880 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
887 if (tunnel->err_count > 0) {
888 if (time_before(jiffies,
889 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
892 dst_link_failure(skb);
894 tunnel->err_count = 0;
897 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
899 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
900 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
901 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
904 #ifdef HAVE_NETDEV_QUEUE_STATS
913 skb_set_owner_w(new_skb, skb->sk);
916 old_iph = ip_hdr(skb);
919 skb_reset_transport_header(skb);
920 skb_push(skb, gre_hlen);
921 skb_reset_network_header(skb);
922 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
923 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
926 skb_dst_set(skb, &rt->u.dst);
929 * Push down and install the IPIP header.
934 iph->ihl = sizeof(struct iphdr) >> 2;
936 iph->protocol = IPPROTO_GRE;
937 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
938 iph->daddr = rt->rt_dst;
939 iph->saddr = rt->rt_src;
941 if ((iph->ttl = tiph->ttl) == 0) {
942 if (skb->protocol == htons(ETH_P_IP))
943 iph->ttl = old_iph->ttl;
945 else if (skb->protocol == htons(ETH_P_IPV6))
946 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
949 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
952 skb->protocol = original_protocol;
954 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
955 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
956 htons(ETH_P_TEB) : skb->protocol;
958 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
959 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
961 if (tunnel->parms.o_flags&GRE_SEQ) {
963 *ptr = htonl(tunnel->o_seqno);
966 if (tunnel->parms.o_flags&GRE_KEY) {
967 *ptr = tunnel->parms.o_key;
970 if (tunnel->parms.o_flags&GRE_CSUM) {
972 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
982 dst_link_failure(skb);
990 static int ipgre_tunnel_bind_dev(struct net_device *dev)
992 struct net_device *tdev = NULL;
993 struct ip_tunnel *tunnel;
995 int hlen = LL_MAX_HEADER;
996 int mtu = ETH_DATA_LEN;
997 int addend = sizeof(struct iphdr) + 4;
999 tunnel = netdev_priv(dev);
1000 iph = &tunnel->parms.iph;
1002 /* Guess output device to choose reasonable mtu and needed_headroom */
1005 struct flowi fl = { .oif = tunnel->parms.link,
1007 { .daddr = iph->daddr,
1008 .saddr = iph->saddr,
1009 .tos = RT_TOS(iph->tos) } },
1010 .proto = IPPROTO_GRE };
1012 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
1013 tdev = rt->u.dst.dev;
1017 if (dev->type != ARPHRD_ETHER)
1018 dev->flags |= IFF_POINTOPOINT;
1021 if (!tdev && tunnel->parms.link)
1022 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
1025 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
1026 hlen = tdev->hard_header_len + tdev->needed_headroom;
1028 hlen = tdev->hard_header_len;
1032 dev->iflink = tunnel->parms.link;
1034 /* Precalculate GRE options length */
1035 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1036 if (tunnel->parms.o_flags&GRE_CSUM)
1038 if (tunnel->parms.o_flags&GRE_KEY)
1040 if (tunnel->parms.o_flags&GRE_SEQ)
1043 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
1044 dev->needed_headroom = hlen + addend;
1045 mtu -= dev->hard_header_len + addend;
1047 dev->hard_header_len = hlen + addend;
1050 tunnel->hlen = addend;
1055 /* XXX: Set MTU to the maximum possible value. If we are bridged to a
1056 * device with a larger MTU then packets will be dropped. */
1063 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1066 struct ip_tunnel_parm p;
1067 struct ip_tunnel *t;
1068 struct net *net = dev_net(dev);
1069 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1070 int add_tunnel, gretap;
1075 if (dev == ign->fb_tunnel_dev) {
1076 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1080 t = ipgre_tunnel_locate(net, &p, false, 0);
1083 t = netdev_priv(dev);
1084 memcpy(&p, &t->parms, sizeof(p));
1085 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1094 if (!capable(CAP_NET_ADMIN))
1098 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1102 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1103 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1104 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1107 add_tunnel = (cmd == SIOCADDTUNNEL || cmd == SIOCADDGRETAP);
1108 gretap = (cmd == SIOCADDGRETAP || cmd == SIOCCHGGRETAP);
1110 if (!(p.i_flags&GRE_KEY))
1112 if (!(p.o_flags&GRE_KEY))
1115 t = ipgre_tunnel_locate(net, &p, gretap, add_tunnel);
1117 if (dev != ign->fb_tunnel_dev && !add_tunnel) {
1119 if (t->dev != dev) {
1124 unsigned nflags = 0;
1126 t = netdev_priv(dev);
1128 if (ipv4_is_multicast(p.iph.daddr))
1129 nflags = IFF_BROADCAST;
1130 else if (p.iph.daddr)
1131 nflags = IFF_POINTOPOINT;
1133 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1137 ipgre_tunnel_unlink(ign, t);
1138 t->parms.iph.saddr = p.iph.saddr;
1139 t->parms.iph.daddr = p.iph.daddr;
1140 t->parms.i_key = p.i_key;
1141 t->parms.o_key = p.o_key;
1142 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1143 memcpy(dev->broadcast, &p.iph.daddr, 4);
1144 ipgre_tunnel_link(ign, t);
1145 netdev_state_change(dev);
1152 t->parms.iph.ttl = p.iph.ttl;
1153 t->parms.iph.tos = p.iph.tos;
1154 t->parms.iph.frag_off = p.iph.frag_off;
1155 if (t->parms.link != p.link) {
1156 t->parms.link = p.link;
1157 dev->mtu = ipgre_tunnel_bind_dev(dev);
1158 netdev_state_change(dev);
1161 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1164 err = (add_tunnel ? -ENOBUFS : -ENOENT);
1169 if (!capable(CAP_NET_ADMIN))
1172 if (dev == ign->fb_tunnel_dev) {
1174 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1177 if ((t = ipgre_tunnel_locate(net, &p, false, 0)) == NULL)
1180 if (t == netdev_priv(ign->fb_tunnel_dev))
1184 unregister_netdevice(dev);
1196 #ifndef HAVE_NETDEV_STATS
1197 static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1199 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1203 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1205 struct ip_tunnel *tunnel = netdev_priv(dev);
1207 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
1208 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1210 new_mtu > 0xFFF8 - tunnel->hlen)
1217 /* Nice toy. Unfortunately, useless in real life :-)
1218 It allows to construct virtual multiprotocol broadcast "LAN"
1219 over the Internet, provided multicast routing is tuned.
1222 I have no idea was this bicycle invented before me,
1223 so that I had to set ARPHRD_IPGRE to a random value.
1224 I have an impression, that Cisco could make something similar,
1225 but this feature is apparently missing in IOS<=11.2(8).
1227 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1228 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1230 ping -t 255 224.66.66.66
1232 If nobody answers, mbone does not work.
1234 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1235 ip addr add 10.66.66.<somewhat>/24 dev Universe
1236 ifconfig Universe up
1237 ifconfig Universe add fe80::<Your_real_addr>/10
1238 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1241 ftp fec0:6666:6666::193.233.7.65
1246 #ifdef HAVE_NETDEV_HEADER_OPS
1247 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1248 unsigned short type,
1249 const void *daddr, const void *saddr, unsigned len)
1251 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
1252 void *daddr, void *saddr, unsigned len)
1255 struct ip_tunnel *t = netdev_priv(dev);
1256 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1257 __be16 *p = (__be16*)(iph+1);
1259 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1260 p[0] = t->parms.o_flags;
1264 * Set the source hardware address.
1268 memcpy(&iph->saddr, saddr, 4);
1271 memcpy(&iph->daddr, daddr, 4);
1274 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1280 #ifdef HAVE_NETDEV_HEADER_OPS
1281 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1283 static int ipgre_header_parse(struct sk_buff *skb, unsigned char *haddr)
1286 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
1287 memcpy(haddr, &iph->saddr, 4);
1291 #ifdef HAVE_NETDEV_HEADER_OPS
1292 static const struct header_ops ipgre_header_ops = {
1293 .create = ipgre_header,
1294 .parse = ipgre_header_parse,
1298 #ifdef CONFIG_NET_IPGRE_BROADCAST
1299 static int ipgre_open(struct net_device *dev)
1301 struct ip_tunnel *t = netdev_priv(dev);
1303 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1304 struct flowi fl = { .oif = t->parms.link,
1306 { .daddr = t->parms.iph.daddr,
1307 .saddr = t->parms.iph.saddr,
1308 .tos = RT_TOS(t->parms.iph.tos) } },
1309 .proto = IPPROTO_GRE };
1311 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1312 return -EADDRNOTAVAIL;
1313 dev = rt->u.dst.dev;
1315 if (__in_dev_get_rtnl(dev) == NULL)
1316 return -EADDRNOTAVAIL;
1317 t->mlink = dev->ifindex;
1318 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1323 static int ipgre_close(struct net_device *dev)
1325 struct ip_tunnel *t = netdev_priv(dev);
1327 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1328 struct in_device *in_dev;
1329 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1331 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1340 static void ethtool_getinfo(struct net_device *dev,
1341 struct ethtool_drvinfo *info)
1343 strcpy(info->driver, "ip_gre");
1344 strcpy(info->version, "Open vSwitch "VERSION BUILDNR);
1345 strcpy(info->bus_info, dev->type == ARPHRD_ETHER ? "gretap" : "gre");
1348 static struct ethtool_ops ethtool_ops = {
1349 .get_drvinfo = ethtool_getinfo,
1352 #ifdef HAVE_NET_DEVICE_OPS
1353 static const struct net_device_ops ipgre_netdev_ops = {
1354 .ndo_init = ipgre_tunnel_init,
1355 .ndo_uninit = ipgre_tunnel_uninit,
1356 #ifdef CONFIG_NET_IPGRE_BROADCAST
1357 .ndo_open = ipgre_open,
1358 .ndo_stop = ipgre_close,
1360 .ndo_start_xmit = ipgre_tunnel_xmit,
1361 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1362 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1366 static void ipgre_tunnel_setup(struct net_device *dev)
1368 #ifdef HAVE_NET_DEVICE_OPS
1369 dev->netdev_ops = &ipgre_netdev_ops;
1371 dev->init = ipgre_tunnel_init;
1372 dev->uninit = ipgre_tunnel_uninit;
1373 dev->hard_start_xmit = ipgre_tunnel_xmit;
1374 #ifndef HAVE_NETDEV_STATS
1375 dev->get_stats = ipgre_tunnel_get_stats;
1377 dev->do_ioctl = ipgre_tunnel_ioctl;
1378 dev->change_mtu = ipgre_tunnel_change_mtu;
1379 #endif /* HAVE_NET_DEVICE_OPS */
1380 dev->destructor = free_netdev;
1382 dev->type = ARPHRD_IPGRE;
1383 #ifdef HAVE_NETDEV_NEEDED_HEADROOM
1384 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1386 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1388 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1389 dev->flags = IFF_NOARP;
1392 dev->features |= NETIF_F_NETNS_LOCAL;
1393 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1395 SET_ETHTOOL_OPS(dev, ðtool_ops);
1398 static int ipgre_tunnel_init(struct net_device *dev)
1400 struct ip_tunnel *tunnel;
1403 tunnel = netdev_priv(dev);
1404 iph = &tunnel->parms.iph;
1407 strcpy(tunnel->parms.name, dev->name);
1409 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1410 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1413 #ifdef CONFIG_NET_IPGRE_BROADCAST
1414 if (ipv4_is_multicast(iph->daddr)) {
1417 dev->flags = IFF_BROADCAST;
1418 #ifdef HAVE_NETDEV_HEADER_OPS
1419 dev->header_ops = &ipgre_header_ops;
1421 dev->hard_header = ipgre_header;
1422 dev->hard_header_parse = ipgre_header_parse;
1424 #ifndef HAVE_NET_DEVICE_OPS
1425 dev->open = ipgre_open;
1426 dev->stop = ipgre_close;
1431 #ifdef HAVE_NETDEV_HEADER_OPS
1432 dev->header_ops = &ipgre_header_ops;
1434 dev->hard_header = ipgre_header;
1435 dev->hard_header_parse = ipgre_header_parse;
1442 #ifdef HAVE_NET_DEVICE_OPS
1443 static void ipgre_fb_tunnel_init(struct net_device *dev)
1445 static int ipgre_fb_tunnel_init(struct net_device *dev)
1448 struct ip_tunnel *tunnel = netdev_priv(dev);
1449 struct iphdr *iph = &tunnel->parms.iph;
1450 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1453 strcpy(tunnel->parms.name, dev->name);
1456 iph->protocol = IPPROTO_GRE;
1458 tunnel->hlen = sizeof(struct iphdr) + 4;
1461 ign->tunnels_wc[0] = tunnel;
1463 #ifndef HAVE_NET_DEVICE_OPS
1468 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32)
1469 static struct net_protocol ipgre_protocol = {
1471 static const struct net_protocol ipgre_protocol = {
1473 .handler = ipgre_rcv,
1474 .err_handler = ipgre_err,
1475 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
1480 static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1484 for (prio = 0; prio < 4; prio++) {
1486 for (h = 0; h < HASH_SIZE; h++) {
1487 struct ip_tunnel *t = ign->tunnels[prio][h];
1490 unregister_netdevice_queue(t->dev, head);
1497 static int ipgre_init_net(struct net *net)
1499 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1502 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), GRE_IOCTL_DEVICE,
1503 ipgre_tunnel_setup);
1504 if (!ign->fb_tunnel_dev) {
1508 dev_net_set(ign->fb_tunnel_dev, net);
1510 #ifdef HAVE_NET_DEVICE_OPS
1511 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1513 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1515 #ifndef GRE_IOCTL_ONLY
1516 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1519 if ((err = register_netdev(ign->fb_tunnel_dev)))
1525 free_netdev(ign->fb_tunnel_dev);
1530 static void ipgre_exit_net(struct net *net)
1532 struct ipgre_net *ign;
1535 ign = net_generic(net, ipgre_net_id);
1537 ipgre_destroy_tunnels(ign, &list);
1538 unregister_netdevice_many(&list);
1542 static struct pernet_operations ipgre_net_ops = {
1543 .init = ipgre_init_net,
1544 .exit = ipgre_exit_net,
1545 .id = &ipgre_net_id,
1546 .size = sizeof(struct ipgre_net),
1549 static int ipgre_tap_init(struct net_device *dev)
1551 struct ip_tunnel *tunnel;
1553 tunnel = netdev_priv(dev);
1556 strcpy(tunnel->parms.name, dev->name);
1558 ipgre_tunnel_bind_dev(dev);
1563 #ifdef HAVE_NET_DEVICE_OPS
1564 static const struct net_device_ops ipgre_tap_netdev_ops = {
1565 .ndo_init = ipgre_tap_init,
1566 .ndo_uninit = ipgre_tunnel_uninit,
1567 .ndo_start_xmit = ipgre_tunnel_xmit,
1568 .ndo_set_mac_address = eth_mac_addr,
1569 .ndo_validate_addr = eth_validate_addr,
1570 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1571 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1575 static void ipgre_tap_setup(struct net_device *dev)
1579 #ifdef HAVE_NET_DEVICE_OPS
1580 dev->netdev_ops = &ipgre_tap_netdev_ops;
1582 dev->init = ipgre_tap_init;
1583 dev->uninit = ipgre_tunnel_uninit;
1584 dev->hard_start_xmit = ipgre_tunnel_xmit;
1585 #ifndef HAVE_NETDEV_STATS
1586 dev->get_stats = ipgre_tunnel_get_stats;
1588 dev->do_ioctl = ipgre_tunnel_ioctl;
1589 dev->change_mtu = ipgre_tunnel_change_mtu;
1590 #endif /* HAVE_NET_DEVICE_OPS */
1591 dev->destructor = free_netdev;
1594 dev->features |= NETIF_F_NETNS_LOCAL;
1595 dev->tx_queue_len = 0;
1597 SET_ETHTOOL_OPS(dev, ðtool_ops);
1600 #ifndef GRE_IOCTL_ONLY
1601 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1609 if (data[IFLA_GRE_IFLAGS])
1610 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1611 if (data[IFLA_GRE_OFLAGS])
1612 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1613 if (flags & (GRE_VERSION|GRE_ROUTING))
1619 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1623 if (tb[IFLA_ADDRESS]) {
1624 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1626 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1627 return -EADDRNOTAVAIL;
1633 if (data[IFLA_GRE_REMOTE]) {
1634 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1640 return ipgre_tunnel_validate(tb, data);
1643 static void ipgre_netlink_parms(struct nlattr *data[],
1644 struct ip_tunnel_parm *parms)
1646 memset(parms, 0, sizeof(*parms));
1648 parms->iph.protocol = IPPROTO_GRE;
1653 if (data[IFLA_GRE_LINK])
1654 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1656 if (data[IFLA_GRE_IFLAGS])
1657 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1659 if (data[IFLA_GRE_OFLAGS])
1660 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1662 if (data[IFLA_GRE_IKEY])
1663 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1665 if (data[IFLA_GRE_OKEY])
1666 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1668 if (data[IFLA_GRE_LOCAL])
1669 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1671 if (data[IFLA_GRE_REMOTE])
1672 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1674 if (data[IFLA_GRE_TTL])
1675 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1677 if (data[IFLA_GRE_TOS])
1678 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1680 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1681 parms->iph.frag_off = htons(IP_DF);
1684 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,33)
1685 static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1686 struct nlattr *data[])
1688 static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1689 struct nlattr *data[])
1692 struct ip_tunnel *nt;
1693 struct net *net = dev_net(dev);
1694 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1698 nt = netdev_priv(dev);
1699 ipgre_netlink_parms(data, &nt->parms);
1701 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1704 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1705 random_ether_addr(dev->dev_addr);
1707 mtu = ipgre_tunnel_bind_dev(dev);
1711 err = register_netdevice(dev);
1716 ipgre_tunnel_link(ign, nt);
1722 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1723 struct nlattr *data[])
1725 struct ip_tunnel *t, *nt;
1726 struct net *net = dev_net(dev);
1727 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1728 struct ip_tunnel_parm p;
1731 if (dev == ign->fb_tunnel_dev)
1734 nt = netdev_priv(dev);
1735 ipgre_netlink_parms(data, &p);
1737 t = ipgre_tunnel_locate(net, &p, false, 0);
1745 if (dev->type != ARPHRD_ETHER) {
1746 unsigned nflags = 0;
1748 if (ipv4_is_multicast(p.iph.daddr))
1749 nflags = IFF_BROADCAST;
1750 else if (p.iph.daddr)
1751 nflags = IFF_POINTOPOINT;
1753 if ((dev->flags ^ nflags) &
1754 (IFF_POINTOPOINT | IFF_BROADCAST))
1758 ipgre_tunnel_unlink(ign, t);
1759 t->parms.iph.saddr = p.iph.saddr;
1760 t->parms.iph.daddr = p.iph.daddr;
1761 t->parms.i_key = p.i_key;
1762 if (dev->type != ARPHRD_ETHER) {
1763 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1764 memcpy(dev->broadcast, &p.iph.daddr, 4);
1766 ipgre_tunnel_link(ign, t);
1767 netdev_state_change(dev);
1770 t->parms.o_key = p.o_key;
1771 t->parms.iph.ttl = p.iph.ttl;
1772 t->parms.iph.tos = p.iph.tos;
1773 t->parms.iph.frag_off = p.iph.frag_off;
1775 if (t->parms.link != p.link) {
1776 t->parms.link = p.link;
1777 mtu = ipgre_tunnel_bind_dev(dev);
1780 netdev_state_change(dev);
1786 static size_t ipgre_get_size(const struct net_device *dev)
1791 /* IFLA_GRE_IFLAGS */
1793 /* IFLA_GRE_OFLAGS */
1799 /* IFLA_GRE_LOCAL */
1801 /* IFLA_GRE_REMOTE */
1807 /* IFLA_GRE_PMTUDISC */
1812 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1814 struct ip_tunnel *t = netdev_priv(dev);
1815 struct ip_tunnel_parm *p = &t->parms;
1817 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1818 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1819 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1820 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1821 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1822 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1823 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1824 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1825 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1826 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1834 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1835 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1836 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1837 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1838 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1839 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1840 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1841 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1842 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1843 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1844 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1847 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1849 .maxtype = IFLA_GRE_MAX,
1850 .policy = ipgre_policy,
1851 .priv_size = sizeof(struct ip_tunnel),
1852 .setup = ipgre_tunnel_setup,
1853 .validate = ipgre_tunnel_validate,
1854 .newlink = ipgre_newlink,
1855 .changelink = ipgre_changelink,
1856 .get_size = ipgre_get_size,
1857 .fill_info = ipgre_fill_info,
1860 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1862 .maxtype = IFLA_GRE_MAX,
1863 .policy = ipgre_policy,
1864 .priv_size = sizeof(struct ip_tunnel),
1865 .setup = ipgre_tap_setup,
1866 .validate = ipgre_tap_validate,
1867 .newlink = ipgre_newlink,
1868 .changelink = ipgre_changelink,
1869 .get_size = ipgre_get_size,
1870 .fill_info = ipgre_fill_info,
1875 * And now the modules code and kernel interface.
1878 static int __init ipgre_init(void)
1882 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1884 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1885 printk(KERN_INFO "ipgre init: can't add protocol\n");
1889 err = register_pernet_device(&ipgre_net_ops);
1891 goto gen_device_failed;
1893 #ifndef GRE_IOCTL_ONLY
1894 err = rtnl_link_register(&ipgre_link_ops);
1896 goto rtnl_link_failed;
1898 err = rtnl_link_register(&ipgre_tap_ops);
1900 goto tap_ops_failed;
1906 #ifndef GRE_IOCTL_ONLY
1908 rtnl_link_unregister(&ipgre_link_ops);
1910 unregister_pernet_device(&ipgre_net_ops);
1913 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1918 static void __exit ipgre_fini(void)
1920 #ifndef GRE_IOCTL_ONLY
1921 rtnl_link_unregister(&ipgre_tap_ops);
1922 rtnl_link_unregister(&ipgre_link_ops);
1924 unregister_pernet_device(&ipgre_net_ops);
1925 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1926 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1929 module_init(ipgre_init);
1930 module_exit(ipgre_fini);
1931 MODULE_DESCRIPTION("GRE over IPv4 tunneling driver");
1932 MODULE_LICENSE("GPL");
1933 #ifndef GRE_IOCTL_ONLY
1934 MODULE_ALIAS_RTNL_LINK("gre");
1935 MODULE_ALIAS_RTNL_LINK("gretap");