fedora core 6 1.2949 + vserver 2.2.0

[linux-2.6.git] / net / ipv6 / icmp.c
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c

index ff3ec98..3dcc4b7 100644 (file)
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -42,6 +42,7 @@
  #include <linux/net.h>
  #include <linux/skbuff.h>
  #include <linux/init.h>
+#include <linux/netfilter.h>
  
  #ifdef CONFIG_SYSCTL
  #include <linux/sysctl.h>
@@ -67,7 +68,7 @@
  #include <asm/uaccess.h>
  #include <asm/system.h>
  
-DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
+DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
  
  /*
   *     The ICMP socket(s). This is the most convenient way to flow control
@@ -79,7 +80,7 @@ DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
  static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
  #define icmpv6_socket  __get_cpu_var(__icmpv6_socket)
  
-static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp);
+static int icmpv6_rcv(struct sk_buff **pskb);
  
  static struct inet6_protocol icmpv6_protocol = {
         .handler        =       icmpv6_rcv,
@@ -150,7 +151,7 @@ static int is_ineligible(struct sk_buff *skb)
         return 0;
  }
  
-static int sysctl_icmpv6_time = 1*HZ; 
+static int sysctl_icmpv6_time __read_mostly = 1*HZ;
  
  /* 
   * Check the ICMP output rate limit 
@@ -176,7 +177,8 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
          */
         dst = ip6_route_output(sk, fl);
         if (dst->error) {
-               IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
+               IP6_INC_STATS(ip6_dst_idev(dst),
+                             IPSTATS_MIB_OUTNOROUTES);
         } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
                 res = 1;
         } else {
@@ -232,7 +234,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
                                                       len, fl->proto,
                                                       skb->csum);
         } else {
-               u32 tmp_csum = 0;
+               __wsum tmp_csum = 0;
  
                 skb_queue_walk(&sk->sk_write_queue, skb) {
                         tmp_csum = csum_add(tmp_csum, skb->csum);
@@ -240,13 +242,11 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
  
                 tmp_csum = csum_partial((char *)icmp6h,
                                         sizeof(struct icmp6hdr), tmp_csum);
-               tmp_csum = csum_ipv6_magic(&fl->fl6_src,
-                                          &fl->fl6_dst,
-                                          len, fl->proto, tmp_csum);
-               icmp6h->icmp6_cksum = tmp_csum;
+               icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
+                                                     &fl->fl6_dst,
+                                                     len, fl->proto,
+                                                     tmp_csum);
         }
-       if (icmp6h->icmp6_cksum == 0)
-               icmp6h->icmp6_cksum = -1;
         ip6_push_pending_frames(sk);
  out:
         return err;
@@ -255,20 +255,46 @@ out:
  struct icmpv6_msg {
         struct sk_buff  *skb;
         int             offset;
+       uint8_t         type;
  };
  
  static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
  {
         struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
         struct sk_buff *org_skb = msg->skb;
-       __u32 csum = 0;
+       __wsum csum = 0;
  
         csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
                                       to, len, csum);
         skb->csum = csum_block_add(skb->csum, csum, odd);
+       if (!(msg->type & ICMPV6_INFOMSG_MASK))
+               nf_ct_attach(skb, org_skb);
         return 0;
  }
  
+#ifdef CONFIG_IPV6_MIP6
+static void mip6_addr_swap(struct sk_buff *skb)
+{
+       struct ipv6hdr *iph = skb->nh.ipv6h;
+       struct inet6_skb_parm *opt = IP6CB(skb);
+       struct ipv6_destopt_hao *hao;
+       struct in6_addr tmp;
+       int off;
+
+       if (opt->dsthao) {
+               off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
+               if (likely(off >= 0)) {
+                       hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off);
+                       ipv6_addr_copy(&tmp, &iph->saddr);
+                       ipv6_addr_copy(&iph->saddr, &hao->addr);
+                       ipv6_addr_copy(&hao->addr, &tmp);
+               }
+       }
+}
+#else
+static inline void mip6_addr_swap(struct sk_buff *skb) {}
+#endif
+
  /*
   *     Send an ICMP message in response to a packet in error
   */
@@ -287,7 +313,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
         int iif = 0;
         int addr_type = 0;
         int len;
-       int hlimit;
+       int hlimit, tclass;
         int err = 0;
  
         if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
@@ -328,12 +354,13 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
                 iif = skb->dev->ifindex;
  
         /*
-        *      Must not send if we know that source is Anycast also.
-        *      for now we don't know that.
+        *      Must not send error if the source does not uniquely
+        *      identify a single node (RFC2463 Section 2.4).
+        *      We check unspecified / multicast addresses here,
+        *      and anycast addresses will be checked later.
          */
         if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
-               LIMIT_NETDEBUG(
-                       printk(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"));
+               LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
                 return;
         }
  
@@ -341,11 +368,12 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
          *      Never answer to a ICMP packet.
          */
         if (is_ineligible(skb)) {
-               LIMIT_NETDEBUG(
-                       printk(KERN_DEBUG "icmpv6_send: no reply to icmp error\n")); 
+               LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
                 return;
         }
  
+       mip6_addr_swap(skb);
+
         memset(&fl, 0, sizeof(fl));
         fl.proto = IPPROTO_ICMPV6;
         ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
@@ -354,6 +382,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
         fl.oif = iif;
         fl.fl_icmp_type = type;
         fl.fl_icmp_code = code;
+       security_skb_classify_flow(skb, &fl);
  
         if (icmpv6_xmit_lock())
                 return;
@@ -375,8 +404,18 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
         err = ip6_dst_lookup(sk, &dst, &fl);
         if (err)
                 goto out;
-       if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+
+       /*
+        * We won't send icmp if the destination is known
+        * anycast.
+        */
+       if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
+               LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
                 goto out_dst_release;
+       }
+
+       if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+               goto out;
  
         if (ipv6_addr_is_multicast(&fl.fl6_dst))
                 hlimit = np->mcast_hops;
@@ -387,14 +426,18 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
         if (hlimit < 0)
                 hlimit = ipv6_get_hoplimit(dst->dev);
  
+       tclass = np->tclass;
+       if (tclass < 0)
+               tclass = 0;
+
         msg.skb = skb;
         msg.offset = skb->nh.raw - skb->data;
+       msg.type = type;
  
         len = skb->len - msg.offset;
         len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
         if (len < 0) {
-               LIMIT_NETDEBUG(
-                       printk(KERN_DEBUG "icmp: len problem\n"));
+               LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
                 goto out_dst_release;
         }
  
@@ -403,7 +446,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
         err = ip6_append_data(sk, icmpv6_getfrag, &msg,
                               len + sizeof(struct icmp6hdr),
                               sizeof(struct icmp6hdr),
-                             hlimit, NULL, &fl, (struct rt6_info*)dst,
+                             hlimit, tclass, NULL, &fl, (struct rt6_info*)dst,
                               MSG_DONTWAIT);
         if (err) {
                 ip6_flush_pending_frames(sk);
@@ -437,6 +480,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
         struct dst_entry *dst;
         int err = 0;
         int hlimit;
+       int tclass;
  
         saddr = &skb->nh.ipv6h->daddr;
  
@@ -453,6 +497,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
                 ipv6_addr_copy(&fl.fl6_src, saddr);
         fl.oif = skb->dev->ifindex;
         fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
+       security_skb_classify_flow(skb, &fl);
  
         if (icmpv6_xmit_lock())
                 return;
@@ -467,7 +512,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
         if (err)
                 goto out;
         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
-               goto out_dst_release;
+               goto out;
  
         if (ipv6_addr_is_multicast(&fl.fl6_dst))
                 hlimit = np->mcast_hops;
@@ -478,13 +523,18 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
         if (hlimit < 0)
                 hlimit = ipv6_get_hoplimit(dst->dev);
  
+       tclass = np->tclass;
+       if (tclass < 0)
+               tclass = 0;
+
         idev = in6_dev_get(skb->dev);
  
         msg.skb = skb;
         msg.offset = 0;
+       msg.type = ICMPV6_ECHO_REPLY;
  
         err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
-                               sizeof(struct icmp6hdr), hlimit, NULL, &fl,
+                               sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl,
                                 (struct rt6_info*)dst, MSG_DONTWAIT);
  
         if (err) {
@@ -499,13 +549,12 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
  out_put: 
         if (likely(idev != NULL))
                 in6_dev_put(idev);
-out_dst_release:
         dst_release(dst);
  out: 
         icmpv6_xmit_unlock();
  }
  
-static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
+static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
  {
         struct in6_addr *saddr, *daddr;
         struct inet6_protocol *ipprot;
@@ -551,7 +600,8 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
  
         read_lock(&raw_v6_lock);
         if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
-               while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr))) {
+               while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr,
+                                           IP6CB(skb)->iif))) {
                         rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
                         sk = sk_next(sk);
                 }
@@ -563,7 +613,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
   *     Handle icmp messages
   */
  
-static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+static int icmpv6_rcv(struct sk_buff **pskb)
  {
         struct sk_buff *skb = *pskb;
         struct net_device *dev = skb->dev;
@@ -579,21 +629,18 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
         daddr = &skb->nh.ipv6h->daddr;
  
         /* Perform checksum. */
-       if (skb->ip_summed == CHECKSUM_HW) {
-               skb->ip_summed = CHECKSUM_UNNECESSARY;
-               if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
-                                   skb->csum)) {
-                       LIMIT_NETDEBUG(
-                               printk(KERN_DEBUG "ICMPv6 hw checksum failed\n"));
-                       skb->ip_summed = CHECKSUM_NONE;
-               }
-       }
-       if (skb->ip_summed == CHECKSUM_NONE) {
-               if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
-                                   skb_checksum(skb, 0, skb->len, 0))) {
-                       LIMIT_NETDEBUG(
-                               printk(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
-                                      NIP6(*saddr), NIP6(*daddr)));
+       switch (skb->ip_summed) {
+       case CHECKSUM_COMPLETE:
+               if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
+                                    skb->csum))
+                       break;
+               /* fall through */
+       case CHECKSUM_NONE:
+               skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
+                                            IPPROTO_ICMPV6, 0));
+               if (__skb_checksum_complete(skb)) {
+                       LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n",
+                                      NIP6(*saddr), NIP6(*daddr));
                         goto discard_it;
                 }
         }
@@ -669,8 +716,7 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
                 break;
  
         default:
-               LIMIT_NETDEBUG(
-                       printk(KERN_DEBUG "icmpv6: msg of unknown type\n"));
+               LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
  
                 /* informational */
                 if (type & ICMPV6_INFOMSG_MASK)
@@ -692,15 +738,17 @@ discard_it:
         return 0;
  }
  
+/*
+ * Special lock-class for __icmpv6_socket:
+ */
+static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
+
  int __init icmpv6_init(struct net_proto_family *ops)
  {
         struct sock *sk;
         int err, i, j;
  
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_possible(i))
-                       continue;
-
+       for_each_possible_cpu(i) {
                 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
                                        &per_cpu(__icmpv6_socket, i));
                 if (err < 0) {
@@ -713,6 +761,14 @@ int __init icmpv6_init(struct net_proto_family *ops)
  
                 sk = per_cpu(__icmpv6_socket, i)->sk;
                 sk->sk_allocation = GFP_ATOMIC;
+               /*
+                * Split off their lock-class, because sk->sk_dst_lock
+                * gets used from softirqs, which is safe for
+                * __icmpv6_socket (because those never get directly used
+                * via userspace syscalls), but unsafe for normal sockets.
+                */
+               lockdep_set_class(&sk->sk_dst_lock,
+                                 &icmpv6_socket_sk_dst_lock_key);
  
                 /* Enough space for 2 64K ICMP packets, including
                  * sk_buff struct overhead.
@@ -746,15 +802,13 @@ void icmpv6_cleanup(void)
  {
         int i;
  
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_possible(i))
-                       continue;
+       for_each_possible_cpu(i) {
                 sock_release(per_cpu(__icmpv6_socket, i));
         }
         inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
  }
  
-static struct icmp6_err {
+static const struct icmp6_err {
         int err;
         int fatal;
  } tab_unreach[] = {