Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / net / ipv4 / icmp.c
index 6926132..2a04559 100644 (file)
@@ -73,6 +73,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/string.h>
 #include <linux/netfilter_ipv4.h>
@@ -114,7 +115,7 @@ struct icmp_bxm {
 /*
  *     Statistics
  */
-DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics);
+DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly;
 
 /* An array of errno for error messages from dest unreach. */
 /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */
@@ -188,10 +189,10 @@ struct icmp_err icmp_err_convert[] = {
 
 /* Control parameters for ECHO replies. */
 int sysctl_icmp_echo_ignore_all;
-int sysctl_icmp_echo_ignore_broadcasts;
+int sysctl_icmp_echo_ignore_broadcasts = 1;
 
 /* Control parameter - ignore bogus broadcast responses? */
-int sysctl_icmp_ignore_bogus_error_responses;
+int sysctl_icmp_ignore_bogus_error_responses = 1;
 
 /*
  *     Configurable global rate limit.
@@ -207,6 +208,7 @@ int sysctl_icmp_ignore_bogus_error_responses;
 
 int sysctl_icmp_ratelimit = 1 * HZ;
 int sysctl_icmp_ratemask = 0x1818;
+int sysctl_icmp_errors_use_inbound_ifaddr;
 
 /*
  *     ICMP control array. This specifies what to do with each ICMP.
@@ -219,7 +221,7 @@ struct icmp_control {
        short   error;          /* This ICMP is classed as an error message */
 };
 
-static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
+static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
 
 /*
  *     The ICMP socket(s). This is the most convenient way to flow control
@@ -327,8 +329,8 @@ static void icmp_out_count(int type)
  *     Checksum each fragment, and on the first include the headers and final
  *     checksum.
  */
-int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
-                  struct sk_buff *skb)
+static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
+                         struct sk_buff *skb)
 {
        struct icmp_bxm *icmp_param = (struct icmp_bxm *)from;
        unsigned int csum;
@@ -338,6 +340,8 @@ int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
                                      to, len, 0);
 
        skb->csum = csum_block_add(skb->csum, csum, odd);
+       if (icmp_pointers[icmp_param->data.icmph.type].error)
+               nf_ct_attach(skb, icmp_param->skb);
        return 0;
 }
 
@@ -346,12 +350,12 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
 {
        struct sk_buff *skb;
 
-       ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
-                      icmp_param->data_len+icmp_param->head_len,
-                      icmp_param->head_len,
-                      ipc, rt, MSG_DONTWAIT);
-
-       if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
+       if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
+                          icmp_param->data_len+icmp_param->head_len,
+                          icmp_param->head_len,
+                          ipc, rt, MSG_DONTWAIT) < 0)
+               ip_flush_pending_frames(icmp_socket->sk);
+       else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
                struct icmphdr *icmph = skb->h.icmph;
                unsigned int csum = 0;
                struct sk_buff *skb1;
@@ -375,13 +379,13 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
 static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 {
        struct sock *sk = icmp_socket->sk;
-       struct inet_opt *inet = inet_sk(sk);
+       struct inet_sock *inet = inet_sk(sk);
        struct ipcm_cookie ipc;
        struct rtable *rt = (struct rtable *)skb->dst;
        u32 daddr;
 
        if (ip_options_echo(&icmp_param->replyopts, skb))
-               goto out;
+               return;
 
        if (icmp_xmit_lock())
                return;
@@ -412,7 +416,6 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
        ip_rt_put(rt);
 out_unlock:
        icmp_xmit_unlock();
-out:;
 }
 
 
@@ -478,20 +481,25 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
                 *      ICMP error
                 */
                if (iph->protocol == IPPROTO_ICMP) {
-                       u8 inner_type;
-
-                       if (skb_copy_bits(skb_in,
-                                         skb_in->nh.raw + (iph->ihl << 2) +
-                                         offsetof(struct icmphdr, type) -
-                                         skb_in->data, &inner_type, 1))
+                       u8 _inner_type, *itp;
+
+                       itp = skb_header_pointer(skb_in,
+                                                skb_in->nh.raw +
+                                                (iph->ihl << 2) +
+                                                offsetof(struct icmphdr,
+                                                         type) -
+                                                skb_in->data,
+                                                sizeof(_inner_type),
+                                                &_inner_type);
+                       if (itp == NULL)
                                goto out;
 
                        /*
                         *      Assume any unknown ICMP type is an error. This
                         *      isn't specified by the RFC, but think about it..
                         */
-                       if (inner_type > NR_ICMP_TYPES ||
-                           icmp_pointers[inner_type].error)
+                       if (*itp > NR_ICMP_TYPES ||
+                           icmp_pointers[*itp].error)
                                goto out;
                }
        }
@@ -503,34 +511,20 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
         *      Construct source address and options.
         */
 
-#ifdef CONFIG_IP_ROUTE_NAT
-       /*
-        *      Restore original addresses if packet has been translated.
-        */
-       if (rt->rt_flags & RTCF_NAT && IPCB(skb_in)->flags & IPSKB_TRANSLATED) {
-               iph->daddr = rt->fl.fl4_dst;
-               iph->saddr = rt->fl.fl4_src;
-       }
-#endif
-
        saddr = iph->daddr;
-       if (!(rt->rt_flags & RTCF_LOCAL))
-               saddr = 0;
+       if (!(rt->rt_flags & RTCF_LOCAL)) {
+               if (sysctl_icmp_errors_use_inbound_ifaddr)
+                       saddr = inet_select_addr(skb_in->dev, 0, RT_SCOPE_LINK);
+               else
+                       saddr = 0;
+       }
 
        tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
                                           IPTOS_PREC_INTERNETCONTROL) :
                                          iph->tos;
 
-       {
-               struct flowi fl = { .nl_u = { .ip4_u = { .daddr = iph->saddr,
-                                                        .saddr = saddr,
-                                                        .tos = RT_TOS(tos) } },
-                                   .proto = IPPROTO_ICMP };
-               if (ip_route_output_key(&rt, &fl))
-                   goto out_unlock;
-       }
        if (ip_options_echo(&icmp_param.replyopts, skb_in))
-               goto ende;
+               goto out_unlock;
 
 
        /*
@@ -547,13 +541,26 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
        inet_sk(icmp_socket->sk)->tos = tos;
        ipc.addr = iph->saddr;
        ipc.opt = &icmp_param.replyopts;
-       if (icmp_param.replyopts.srr) {
-               struct flowi fl = { .nl_u = { .ip4_u =
-                                             { .daddr = icmp_param.replyopts.faddr,
-                                               .saddr = saddr,
-                                               .tos = RT_TOS(tos) } },
-                                   .proto = IPPROTO_ICMP };
-               ip_rt_put(rt);
+
+       {
+               struct flowi fl = {
+                       .nl_u = {
+                               .ip4_u = {
+                                       .daddr = icmp_param.replyopts.srr ?
+                                               icmp_param.replyopts.faddr :
+                                               iph->saddr,
+                                       .saddr = saddr,
+                                       .tos = RT_TOS(tos)
+                               }
+                       },
+                       .proto = IPPROTO_ICMP,
+                       .uli_u = {
+                               .icmpt = {
+                                       .type = type,
+                                       .code = code
+                               }
+                       }
+               };
                if (ip_route_output_key(&rt, &fl))
                        goto out_unlock;
        }
@@ -563,7 +570,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
 
        /* RFC says return as much as we can without exceeding 576 bytes. */
 
-       room = dst_pmtu(&rt->u.dst);
+       room = dst_mtu(&rt->u.dst);
        if (room > 576)
                room = 576;
        room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen;
@@ -620,11 +627,10 @@ static void icmp_unreach(struct sk_buff *skb)
                        break;
                case ICMP_FRAG_NEEDED:
                        if (ipv4_config.no_pmtu_disc) {
-                               LIMIT_NETDEBUG(
-                                       printk(KERN_INFO "ICMP: %u.%u.%u.%u: "
+                               LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: "
                                                         "fragmentation needed "
                                                         "and DF set.\n",
-                                              NIPQUAD(iph->daddr)));
+                                              NIPQUAD(iph->daddr));
                        } else {
                                info = ip_rt_frag_needed(iph,
                                                     ntohs(icmph->un.frag.mtu));
@@ -633,10 +639,9 @@ static void icmp_unreach(struct sk_buff *skb)
                        }
                        break;
                case ICMP_SR_FAILED:
-                       LIMIT_NETDEBUG(
-                               printk(KERN_INFO "ICMP: %u.%u.%u.%u: Source "
+                       LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: Source "
                                                 "Route Failed.\n",
-                                      NIPQUAD(iph->daddr)));
+                                      NIPQUAD(iph->daddr));
                        break;
                default:
                        break;
@@ -705,8 +710,7 @@ static void icmp_unreach(struct sk_buff *skb)
        read_unlock(&raw_v4_lock);
 
        rcu_read_lock();
-       ipprot = inet_protos[hash];
-       smp_read_barrier_depends();
+       ipprot = rcu_dereference(inet_protos[hash]);
        if (ipprot && ipprot->err_handler)
                ipprot->err_handler(skb, info);
        rcu_read_unlock();
@@ -749,7 +753,7 @@ static void icmp_redirect(struct sk_buff *skb)
        case ICMP_REDIR_HOST:
        case ICMP_REDIR_HOSTTOS:
                ip_rt_redirect(skb->nh.iph->saddr, ip, skb->h.icmph->un.gateway,
-                              iph->saddr, iph->tos, skb->dev);
+                              iph->saddr, skb->dev);
                break;
        }
 out:
@@ -880,7 +884,6 @@ static void icmp_address_reply(struct sk_buff *skb)
        struct net_device *dev = skb->dev;
        struct in_device *in_dev;
        struct in_ifaddr *ifa;
-       u32 mask;
 
        if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC))
                goto out;
@@ -888,24 +891,26 @@ static void icmp_address_reply(struct sk_buff *skb)
        in_dev = in_dev_get(dev);
        if (!in_dev)
                goto out;
-       read_lock(&in_dev->lock);
+       rcu_read_lock();
        if (in_dev->ifa_list &&
            IN_DEV_LOG_MARTIANS(in_dev) &&
            IN_DEV_FORWARD(in_dev)) {
-               if (skb_copy_bits(skb, 0, &mask, 4))
-                       BUG();
+               u32 _mask, *mp;
+
+               mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask);
+               BUG_ON(mp == NULL);
                for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
-                       if (mask == ifa->ifa_mask &&
+                       if (*mp == ifa->ifa_mask &&
                            inet_ifa_match(rt->rt_src, ifa))
                                break;
                }
                if (!ifa && net_ratelimit()) {
                        printk(KERN_INFO "Wrong address mask %u.%u.%u.%u from "
                                         "%s/%u.%u.%u.%u\n",
-                              NIPQUAD(mask), dev->name, NIPQUAD(rt->rt_src));
+                              NIPQUAD(*mp), dev->name, NIPQUAD(rt->rt_src));
                }
        }
-       read_unlock(&in_dev->lock);
+       rcu_read_unlock();
        in_dev_put(in_dev);
 out:;
 }
@@ -928,12 +933,11 @@ int icmp_rcv(struct sk_buff *skb)
        case CHECKSUM_HW:
                if (!(u16)csum_fold(skb->csum))
                        break;
-               NETDEBUG(if (net_ratelimit())
-                               printk(KERN_DEBUG "icmp v4 hw csum failure\n"));
+               /* fall through */
        case CHECKSUM_NONE:
-               if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))
+               skb->csum = 0;
+               if (__skb_checksum_complete(skb))
                        goto error;
-       default:;
        }
 
        if (!pskb_pull(skb, sizeof(struct icmphdr)))
@@ -962,7 +966,8 @@ int icmp_rcv(struct sk_buff *skb)
                 *      RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently
                 *        discarded if to broadcast/multicast.
                 */
-               if (icmph->type == ICMP_ECHO &&
+               if ((icmph->type == ICMP_ECHO ||
+                    icmph->type == ICMP_TIMESTAMP) &&
                    sysctl_icmp_echo_ignore_broadcasts) {
                        goto error;
                }
@@ -988,7 +993,7 @@ error:
 /*
  *     This table is the definition of how we handle ICMP.
  */
-static struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
+static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
        [ICMP_ECHOREPLY] = {
                .output_entry = ICMP_MIB_OUTECHOREPS,
                .input_entry = ICMP_MIB_INECHOREPS,
@@ -1099,15 +1104,12 @@ static struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
 
 void __init icmp_init(struct net_proto_family *ops)
 {
-       struct inet_opt *inet;
+       struct inet_sock *inet;
        int i;
 
-       for (i = 0; i < NR_CPUS; i++) {
+       for_each_possible_cpu(i) {
                int err;
 
-               if (!cpu_possible(i))
-                       continue;
-
                err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,
                                       &per_cpu(__icmp_socket, i));