*/
#include <linux/capability.h>
+#include <linux/ethtool.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
fatal route to network, even if it were you who configured
fatal static route: you are innocent. :-)
-
+ XXX: Forcing the DF flag on was done only when setting up tunnels via the
+ ioctl interface and not Netlink. Since it prevents some operations
+ and isn't very transparent I removed it. It seems nobody really
+ cared about it anyways.
+ Moral: don't create loops.
3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
practically identical code. It would be good to glue them
#define HASH_SIZE 16
-static int ipgre_net_id;
+static int ipgre_net_id __read_mostly;
struct ipgre_net {
struct ip_tunnel *tunnels[4][HASH_SIZE];
#define tunnels_r tunnels[2]
#define tunnels_l tunnels[1]
#define tunnels_wc tunnels[0]
+/*
+ * Locking : hash tables are protected by RCU and a spinlock
+ */
+static DEFINE_SPINLOCK(ipgre_lock);
-static DEFINE_RWLOCK(ipgre_lock);
+#define for_each_ip_tunnel_rcu(start) \
+ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
/* Given src, dst and key, find appropriate for input tunnel. */
ARPHRD_ETHER : ARPHRD_IPGRE;
int score, cand_score = 4;
- for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
+ for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
if (local != t->parms.iph.saddr ||
remote != t->parms.iph.daddr ||
key != t->parms.i_key ||
}
}
- for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
+ for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
if (remote != t->parms.iph.daddr ||
key != t->parms.i_key ||
!(t->dev->flags & IFF_UP))
}
}
- for (t = ign->tunnels_l[h1]; t; t = t->next) {
+ for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
if ((local != t->parms.iph.saddr &&
(local != t->parms.iph.daddr ||
!ipv4_is_multicast(local))) ||
}
}
- for (t = ign->tunnels_wc[h1]; t; t = t->next) {
+ for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
if (t->parms.i_key != key ||
!(t->dev->flags & IFF_UP))
continue;
if (cand != NULL)
return cand;
- if (ign->fb_tunnel_dev->flags & IFF_UP)
- return netdev_priv(ign->fb_tunnel_dev);
+ dev = ign->fb_tunnel_dev;
+ if (dev->flags & IFF_UP)
+ return netdev_priv(dev);
return NULL;
}
{
struct ip_tunnel **tp = ipgre_bucket(ign, t);
+ spin_lock_bh(&ipgre_lock);
t->next = *tp;
- write_lock_bh(&ipgre_lock);
- *tp = t;
- write_unlock_bh(&ipgre_lock);
+ rcu_assign_pointer(*tp, t);
+ spin_unlock_bh(&ipgre_lock);
}
static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
if (t == *tp) {
- write_lock_bh(&ipgre_lock);
+ spin_lock_bh(&ipgre_lock);
*tp = t->next;
- write_unlock_bh(&ipgre_lock);
+ spin_unlock_bh(&ipgre_lock);
break;
}
}
dev_put(dev);
}
+static unsigned int tunnel_hard_header_len(struct net_device *dev)
+{
+#ifdef HAVE_NETDEV_NEEDED_HEADROOM
+ return dev->hard_header_len;
+#else
+ return (dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0;
+#endif
+}
static void ipgre_err(struct sk_buff *skb, u32 info)
{
struct ip_tunnel *t;
__be16 flags;
+ if (skb_headlen(skb) < grehlen)
+ return;
+
flags = p[0];
if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
if (flags&(GRE_VERSION|GRE_ROUTING))
break;
}
- read_lock(&ipgre_lock);
+ rcu_read_lock();
t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
flags & GRE_KEY ?
*(((__be32 *)p) + (grehlen / 4) - 1) : 0,
t->err_count = 1;
t->err_time = jiffies;
out:
- read_unlock(&ipgre_lock);
+ rcu_read_unlock();
return;
}
{
if (INET_ECN_is_ce(iph->tos)) {
if (skb->protocol == htons(ETH_P_IP)) {
+ if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
+ + sizeof(struct iphdr) - skb->data)))
+ return;
+
IP_ECN_set_ce(ip_hdr(skb));
} else if (skb->protocol == htons(ETH_P_IPV6)) {
+ if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
+ + sizeof(struct ipv6hdr) - skb->data)))
+ return;
+
IP6_ECN_set_ce(ipv6_hdr(skb));
}
}
gre_proto = *(__be16 *)(h + 2);
- read_lock(&ipgre_lock);
+ rcu_read_lock();
if ((tunnel = ipgre_tunnel_lookup(skb->dev,
iph->saddr, iph->daddr, key,
gre_proto))) {
nf_reset(skb);
skb_reset_network_header(skb);
- ipgre_ecn_decapsulate(iph, skb);
-#ifdef CHECKSUM_HW
- /* XXX: Temporary workaround to avoid a panic when doing
- * bridging due to multiple meanings of CHECKSUM_HW. */
- if (skb->ip_summed == CHECKSUM_HW)
- skb->ip_summed = CHECKSUM_NONE;
-#endif
+ /* Invalidates pointers. */
+ ipgre_ecn_decapsulate(iph, skb);
netif_rx(skb);
- read_unlock(&ipgre_lock);
+ rcu_read_unlock();
return(0);
}
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
drop:
- read_unlock(&ipgre_lock);
+ rcu_read_unlock();
drop_nolock:
kfree_skb(skb);
return(0);
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct net_device_stats *stats;
+#ifdef HAVE_NETDEV_QUEUE_STATS
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
+#endif
struct iphdr *old_iph = ip_hdr(skb);
struct iphdr *tiph;
u8 tos;
int gre_hlen;
__be32 dst;
int mtu;
+ u8 original_protocol;
#ifdef HAVE_NETDEV_STATS
- stats = &tunnel->dev->stats;
+ stats = &dev->stats;
#else
stats = &tunnel->stat;
#endif
+ /* Validate the protocol headers before we try to use them. */
+ original_protocol = skb->protocol;
+ if (skb->protocol == htons(ETH_P_IP)) {
+ if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
+ + sizeof(struct iphdr) - skb->data)))
+ skb->protocol = 0;
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
+ + sizeof(struct ipv6hdr) - skb->data)))
+ skb->protocol = 0;
+ }
+
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
tos = 0;
if (skb->protocol == htons(ETH_P_IP))
tos = old_iph->tos;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ tos = ipv6_get_dsfield(ipv6_hdr(skb));
}
{
df = tiph->frag_off;
if (df)
-#ifdef HAVE_NETDEV_NEEDED_HEADROOM
- mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
-#else
- mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
-#endif
+ mtu = dst_mtu(&rt->u.dst) - tunnel_hard_header_len(dev)
+ - tunnel->hlen;
else
mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
if (!new_skb) {
ip_rt_put(rt);
+#ifdef HAVE_NETDEV_QUEUE_STATS
+ txq->tx_dropped++;
+#else
stats->tx_dropped++;
+#endif
dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
}
+ skb->protocol = original_protocol;
+
((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
htons(ETH_P_TEB) : skb->protocol;
}
#ifdef HAVE_NETDEV_NEEDED_HEADROOM
dev->needed_headroom = hlen + addend;
- mtu -= dev->hard_header_len + addend;
#else
dev->hard_header_len = hlen + addend;
- mtu -= addend;
#endif
+ mtu -= tunnel_hard_header_len(dev) + addend;
tunnel->hlen = addend;
if (mtu < 68)
add_tunnel = (cmd == SIOCADDTUNNEL || cmd == SIOCADDGRETAP);
gretap = (cmd == SIOCADDGRETAP || cmd == SIOCCHGGRETAP);
- if (p.iph.ttl)
- p.iph.frag_off |= htons(IP_DF);
-
if (!(p.i_flags&GRE_KEY))
p.i_key = 0;
if (!(p.o_flags&GRE_KEY))
{
struct ip_tunnel *tunnel = netdev_priv(dev);
if (new_mtu < 68 ||
-#ifdef HAVE_NETDEV_NEEDED_HEADROOM
- new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
-#else
- new_mtu > 0xFFF8 - tunnel->hlen)
-#endif
+ new_mtu > 0xFFF8 - tunnel_hard_header_len(dev) - tunnel->hlen)
return -EINVAL;
dev->mtu = new_mtu;
return 0;
#endif
+static void ethtool_getinfo(struct net_device *dev,
+ struct ethtool_drvinfo *info)
+{
+ strcpy(info->driver, "ip_gre");
+ strcpy(info->version, "Open vSwitch "VERSION BUILDNR);
+ strcpy(info->bus_info, dev->type == ARPHRD_ETHER ? "gretap" : "gre");
+}
+
+static struct ethtool_ops ethtool_ops = {
+ .get_drvinfo = ethtool_getinfo,
+};
+
#ifdef HAVE_NET_DEVICE_OPS
static const struct net_device_ops ipgre_netdev_ops = {
.ndo_init = ipgre_tunnel_init,
dev->addr_len = 4;
dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+
+ SET_ETHTOOL_OPS(dev, ðtool_ops);
}
static int ipgre_tunnel_init(struct net_device *dev)
#endif
};
-static void ipgre_destroy_tunnels(struct ipgre_net *ign)
+static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
{
int prio;
for (prio = 0; prio < 4; prio++) {
int h;
for (h = 0; h < HASH_SIZE; h++) {
- struct ip_tunnel *t;
- while ((t = ign->tunnels[prio][h]) != NULL)
- unregister_netdevice(t->dev);
+ struct ip_tunnel *t = ign->tunnels[prio][h];
+
+ while (t != NULL) {
+ unregister_netdevice_queue(t->dev, head);
+ t = t->next;
+ }
}
}
}
static int ipgre_init_net(struct net *net)
{
+ struct ipgre_net *ign = net_generic(net, ipgre_net_id);
int err;
- struct ipgre_net *ign;
-
- err = -ENOMEM;
- ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
- if (ign == NULL)
- goto err_alloc;
-
- err = net_assign_generic(net, ipgre_net_id, ign);
- if (err < 0)
- goto err_assign;
ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), GRE_IOCTL_DEVICE,
ipgre_tunnel_setup);
err_reg_dev:
free_netdev(ign->fb_tunnel_dev);
err_alloc_dev:
- /* nothing */
-err_assign:
- kfree(ign);
-err_alloc:
return err;
}
static void ipgre_exit_net(struct net *net)
{
struct ipgre_net *ign;
+ LIST_HEAD(list);
ign = net_generic(net, ipgre_net_id);
rtnl_lock();
- ipgre_destroy_tunnels(ign);
+ ipgre_destroy_tunnels(ign, &list);
+ unregister_netdevice_many(&list);
rtnl_unlock();
- kfree(ign);
}
static struct pernet_operations ipgre_net_ops = {
.init = ipgre_init_net,
.exit = ipgre_exit_net,
+ .id = &ipgre_net_id,
+ .size = sizeof(struct ipgre_net),
};
static int ipgre_tap_init(struct net_device *dev)
dev->iflink = 0;
dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->tx_queue_len = 0;
+
+ SET_ETHTOOL_OPS(dev, ðtool_ops);
}
#ifndef GRE_IOCTL_ONLY
parms->iph.frag_off = htons(IP_DF);
}
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,33)
+static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[])
+#else
static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
+#endif
{
struct ip_tunnel *nt;
struct net *net = dev_net(dev);
return -EAGAIN;
}
- err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
+ err = register_pernet_device(&ipgre_net_ops);
if (err < 0)
goto gen_device_failed;
tap_ops_failed:
rtnl_link_unregister(&ipgre_link_ops);
rtnl_link_failed:
- unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
+ unregister_pernet_device(&ipgre_net_ops);
#endif
gen_device_failed:
inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
rtnl_link_unregister(&ipgre_tap_ops);
rtnl_link_unregister(&ipgre_link_ops);
#endif
- unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
+ unregister_pernet_device(&ipgre_net_ops);
if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
printk(KERN_INFO "ipgre close: can't remove protocol\n");
}