fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / net / ipv4 / icmp.c
index 0625892..40cf0d0 100644 (file)
@@ -64,7 +64,6 @@
  *
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/jiffies.h>
@@ -73,6 +72,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/string.h>
 #include <linux/netfilter_ipv4.h>
@@ -104,7 +104,7 @@ struct icmp_bxm {
 
        struct {
                struct icmphdr icmph;
-               __u32          times[3];
+               __be32         times[3];
        } data;
        int head_len;
        struct ip_options replyopts;
@@ -114,7 +114,7 @@ struct icmp_bxm {
 /*
  *     Statistics
  */
-DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics);
+DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly;
 
 /* An array of errno for error messages from dest unreach. */
 /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */
@@ -187,11 +187,11 @@ struct icmp_err icmp_err_convert[] = {
 };
 
 /* Control parameters for ECHO replies. */
-int sysctl_icmp_echo_ignore_all;
-int sysctl_icmp_echo_ignore_broadcasts;
+int sysctl_icmp_echo_ignore_all __read_mostly;
+int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1;
 
 /* Control parameter - ignore bogus broadcast responses? */
-int sysctl_icmp_ignore_bogus_error_responses;
+int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1;
 
 /*
  *     Configurable global rate limit.
@@ -205,8 +205,9 @@ int sysctl_icmp_ignore_bogus_error_responses;
  *     time exceeded (11), parameter problem (12)
  */
 
-int sysctl_icmp_ratelimit = 1 * HZ;
-int sysctl_icmp_ratemask = 0x1818;
+int sysctl_icmp_ratelimit __read_mostly = 1 * HZ;
+int sysctl_icmp_ratemask __read_mostly = 0x1818;
+int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly;
 
 /*
  *     ICMP control array. This specifies what to do with each ICMP.
@@ -219,7 +220,7 @@ struct icmp_control {
        short   error;          /* This ICMP is classed as an error message */
 };
 
-static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
+static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
 
 /*
  *     The ICMP socket(s). This is the most convenient way to flow control
@@ -327,17 +328,19 @@ static void icmp_out_count(int type)
  *     Checksum each fragment, and on the first include the headers and final
  *     checksum.
  */
-int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
-                  struct sk_buff *skb)
+static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
+                         struct sk_buff *skb)
 {
        struct icmp_bxm *icmp_param = (struct icmp_bxm *)from;
-       unsigned int csum;
+       __wsum csum;
 
        csum = skb_copy_and_csum_bits(icmp_param->skb,
                                      icmp_param->offset + offset,
                                      to, len, 0);
 
        skb->csum = csum_block_add(skb->csum, csum, odd);
+       if (icmp_pointers[icmp_param->data.icmph.type].error)
+               nf_ct_attach(skb, icmp_param->skb);
        return 0;
 }
 
@@ -346,14 +349,14 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
 {
        struct sk_buff *skb;
 
-       ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
-                      icmp_param->data_len+icmp_param->head_len,
-                      icmp_param->head_len,
-                      ipc, rt, MSG_DONTWAIT);
-
-       if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
+       if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
+                          icmp_param->data_len+icmp_param->head_len,
+                          icmp_param->head_len,
+                          ipc, rt, MSG_DONTWAIT) < 0)
+               ip_flush_pending_frames(icmp_socket->sk);
+       else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
                struct icmphdr *icmph = skb->h.icmph;
-               unsigned int csum = 0;
+               __wsum csum = 0;
                struct sk_buff *skb1;
 
                skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) {
@@ -375,13 +378,13 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
 static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 {
        struct sock *sk = icmp_socket->sk;
-       struct inet_opt *inet = inet_sk(sk);
+       struct inet_sock *inet = inet_sk(sk);
        struct ipcm_cookie ipc;
        struct rtable *rt = (struct rtable *)skb->dst;
-       u32 daddr;
+       __be32 daddr;
 
        if (ip_options_echo(&icmp_param->replyopts, skb))
-               goto out;
+               return;
 
        if (icmp_xmit_lock())
                return;
@@ -403,6 +406,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
                                                .saddr = rt->rt_spec_dst,
                                                .tos = RT_TOS(skb->nh.iph->tos) } },
                                    .proto = IPPROTO_ICMP };
+               security_skb_classify_flow(skb, &fl);
                if (ip_route_output_key(&rt, &fl))
                        goto out_unlock;
        }
@@ -412,7 +416,6 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
        ip_rt_put(rt);
 out_unlock:
        icmp_xmit_unlock();
-out:;
 }
 
 
@@ -427,14 +430,14 @@ out:;
  *                     MUST reply to only the first fragment.
  */
 
-void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
+void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 {
        struct iphdr *iph;
        int room;
        struct icmp_bxm icmp_param;
        struct rtable *rt = (struct rtable *)skb_in->dst;
        struct ipcm_cookie ipc;
-       u32 saddr;
+       __be32 saddr;
        u8  tos;
 
        if (!rt)
@@ -509,23 +512,19 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
         */
 
        saddr = iph->daddr;
-       if (!(rt->rt_flags & RTCF_LOCAL))
-               saddr = 0;
+       if (!(rt->rt_flags & RTCF_LOCAL)) {
+               if (sysctl_icmp_errors_use_inbound_ifaddr)
+                       saddr = inet_select_addr(skb_in->dev, 0, RT_SCOPE_LINK);
+               else
+                       saddr = 0;
+       }
 
        tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
                                           IPTOS_PREC_INTERNETCONTROL) :
                                          iph->tos;
 
-       {
-               struct flowi fl = { .nl_u = { .ip4_u = { .daddr = iph->saddr,
-                                                        .saddr = saddr,
-                                                        .tos = RT_TOS(tos) } },
-                                   .proto = IPPROTO_ICMP };
-               if (ip_route_output_key(&rt, &fl))
-                   goto out_unlock;
-       }
        if (ip_options_echo(&icmp_param.replyopts, skb_in))
-               goto ende;
+               goto out_unlock;
 
 
        /*
@@ -542,13 +541,27 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
        inet_sk(icmp_socket->sk)->tos = tos;
        ipc.addr = iph->saddr;
        ipc.opt = &icmp_param.replyopts;
-       if (icmp_param.replyopts.srr) {
-               struct flowi fl = { .nl_u = { .ip4_u =
-                                             { .daddr = icmp_param.replyopts.faddr,
-                                               .saddr = saddr,
-                                               .tos = RT_TOS(tos) } },
-                                   .proto = IPPROTO_ICMP };
-               ip_rt_put(rt);
+
+       {
+               struct flowi fl = {
+                       .nl_u = {
+                               .ip4_u = {
+                                       .daddr = icmp_param.replyopts.srr ?
+                                               icmp_param.replyopts.faddr :
+                                               iph->saddr,
+                                       .saddr = saddr,
+                                       .tos = RT_TOS(tos)
+                               }
+                       },
+                       .proto = IPPROTO_ICMP,
+                       .uli_u = {
+                               .icmpt = {
+                                       .type = type,
+                                       .code = code
+                               }
+                       }
+               };
+               security_skb_classify_flow(skb_in, &fl);
                if (ip_route_output_key(&rt, &fl))
                        goto out_unlock;
        }
@@ -558,7 +571,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
 
        /* RFC says return as much as we can without exceeding 576 bytes. */
 
-       room = dst_pmtu(&rt->u.dst);
+       room = dst_mtu(&rt->u.dst);
        if (room > 576)
                room = 576;
        room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen;
@@ -615,11 +628,10 @@ static void icmp_unreach(struct sk_buff *skb)
                        break;
                case ICMP_FRAG_NEEDED:
                        if (ipv4_config.no_pmtu_disc) {
-                               LIMIT_NETDEBUG(
-                                       printk(KERN_INFO "ICMP: %u.%u.%u.%u: "
+                               LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: "
                                                         "fragmentation needed "
                                                         "and DF set.\n",
-                                              NIPQUAD(iph->daddr)));
+                                              NIPQUAD(iph->daddr));
                        } else {
                                info = ip_rt_frag_needed(iph,
                                                     ntohs(icmph->un.frag.mtu));
@@ -628,10 +640,9 @@ static void icmp_unreach(struct sk_buff *skb)
                        }
                        break;
                case ICMP_SR_FAILED:
-                       LIMIT_NETDEBUG(
-                               printk(KERN_INFO "ICMP: %u.%u.%u.%u: Source "
+                       LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: Source "
                                                 "Route Failed.\n",
-                                      NIPQUAD(iph->daddr)));
+                                      NIPQUAD(iph->daddr));
                        break;
                default:
                        break;
@@ -720,7 +731,6 @@ out_err:
 static void icmp_redirect(struct sk_buff *skb)
 {
        struct iphdr *iph;
-       unsigned long ip;
 
        if (skb->len < sizeof(struct iphdr))
                goto out_err;
@@ -732,7 +742,6 @@ static void icmp_redirect(struct sk_buff *skb)
                goto out;
 
        iph = (struct iphdr *)skb->data;
-       ip = iph->daddr;
 
        switch (skb->h.icmph->code & 7) {
        case ICMP_REDIR_NET:
@@ -742,8 +751,9 @@ static void icmp_redirect(struct sk_buff *skb)
                 */
        case ICMP_REDIR_HOST:
        case ICMP_REDIR_HOSTTOS:
-               ip_rt_redirect(skb->nh.iph->saddr, ip, skb->h.icmph->un.gateway,
-                              iph->saddr, iph->tos, skb->dev);
+               ip_rt_redirect(skb->nh.iph->saddr, iph->daddr,
+                              skb->h.icmph->un.gateway,
+                              iph->saddr, skb->dev);
                break;
        }
 out:
@@ -885,11 +895,10 @@ static void icmp_address_reply(struct sk_buff *skb)
        if (in_dev->ifa_list &&
            IN_DEV_LOG_MARTIANS(in_dev) &&
            IN_DEV_FORWARD(in_dev)) {
-               u32 _mask, *mp;
+               __be32 _mask, *mp;
 
                mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask);
-               if (mp == NULL)
-                       BUG();
+               BUG_ON(mp == NULL);
                for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
                        if (*mp == ifa->ifa_mask &&
                            inet_ifa_match(rt->rt_src, ifa))
@@ -921,15 +930,14 @@ int icmp_rcv(struct sk_buff *skb)
        ICMP_INC_STATS_BH(ICMP_MIB_INMSGS);
 
        switch (skb->ip_summed) {
-       case CHECKSUM_HW:
-               if (!(u16)csum_fold(skb->csum))
+       case CHECKSUM_COMPLETE:
+               if (!csum_fold(skb->csum))
                        break;
-               NETDEBUG(if (net_ratelimit())
-                               printk(KERN_DEBUG "icmp v4 hw csum failure\n"));
+               /* fall through */
        case CHECKSUM_NONE:
-               if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))
+               skb->csum = 0;
+               if (__skb_checksum_complete(skb))
                        goto error;
-       default:;
        }
 
        if (!pskb_pull(skb, sizeof(struct icmphdr)))
@@ -958,7 +966,8 @@ int icmp_rcv(struct sk_buff *skb)
                 *      RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently
                 *        discarded if to broadcast/multicast.
                 */
-               if (icmph->type == ICMP_ECHO &&
+               if ((icmph->type == ICMP_ECHO ||
+                    icmph->type == ICMP_TIMESTAMP) &&
                    sysctl_icmp_echo_ignore_broadcasts) {
                        goto error;
                }
@@ -984,7 +993,7 @@ error:
 /*
  *     This table is the definition of how we handle ICMP.
  */
-static struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
+static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
        [ICMP_ECHOREPLY] = {
                .output_entry = ICMP_MIB_OUTECHOREPS,
                .input_entry = ICMP_MIB_INECHOREPS,
@@ -1095,15 +1104,12 @@ static struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
 
 void __init icmp_init(struct net_proto_family *ops)
 {
-       struct inet_opt *inet;
+       struct inet_sock *inet;
        int i;
 
-       for (i = 0; i < NR_CPUS; i++) {
+       for_each_possible_cpu(i) {
                int err;
 
-               if (!cpu_possible(i))
-                       continue;
-
                err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,
                                       &per_cpu(__icmp_socket, i));