X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Ficmp.c;h=40cf0d0e1b83c21d4f2ba675aea2b764ab027550;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=4a1a8e21d30ed0f6b7d3e132b49055fed2a90b69;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4a1a8e21d..40cf0d0e1 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -64,7 +64,6 @@ * */ -#include #include #include #include @@ -73,6 +72,7 @@ #include #include #include +#include #include #include #include @@ -104,7 +104,7 @@ struct icmp_bxm { struct { struct icmphdr icmph; - __u32 times[3]; + __be32 times[3]; } data; int head_len; struct ip_options replyopts; @@ -114,7 +114,7 @@ struct icmp_bxm { /* * Statistics */ -DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics); +DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly; /* An array of errno for error messages from dest unreach. */ /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ @@ -187,11 +187,11 @@ struct icmp_err icmp_err_convert[] = { }; /* Control parameters for ECHO replies. */ -int sysctl_icmp_echo_ignore_all; -int sysctl_icmp_echo_ignore_broadcasts; +int sysctl_icmp_echo_ignore_all __read_mostly; +int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1; /* Control parameter - ignore bogus broadcast responses? */ -int sysctl_icmp_ignore_bogus_error_responses; +int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1; /* * Configurable global rate limit. @@ -205,21 +205,22 @@ int sysctl_icmp_ignore_bogus_error_responses; * time exceeded (11), parameter problem (12) */ -int sysctl_icmp_ratelimit = 1 * HZ; -int sysctl_icmp_ratemask = 0x1818; +int sysctl_icmp_ratelimit __read_mostly = 1 * HZ; +int sysctl_icmp_ratemask __read_mostly = 0x1818; +int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly; /* * ICMP control array. This specifies what to do with each ICMP. */ struct icmp_control { - int output_off; /* Field offset for increment on output */ - int input_off; /* Field offset for increment on input */ + int output_entry; /* Field for increment on output */ + int input_entry; /* Field for increment on input */ void (*handler)(struct sk_buff *skb); short error; /* This ICMP is classed as an error message */ }; -static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; +static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; /* * The ICMP socket(s). This is the most convenient way to flow control @@ -318,8 +319,8 @@ out: static void icmp_out_count(int type) { if (type <= NR_ICMP_TYPES) { - ICMP_INC_STATS_FIELD(icmp_pointers[type].output_off); - ICMP_INC_STATS(IcmpOutMsgs); + ICMP_INC_STATS(icmp_pointers[type].output_entry); + ICMP_INC_STATS(ICMP_MIB_OUTMSGS); } } @@ -327,17 +328,19 @@ static void icmp_out_count(int type) * Checksum each fragment, and on the first include the headers and final * checksum. */ -int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, - struct sk_buff *skb) +static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, + struct sk_buff *skb) { struct icmp_bxm *icmp_param = (struct icmp_bxm *)from; - unsigned int csum; + __wsum csum; csum = skb_copy_and_csum_bits(icmp_param->skb, icmp_param->offset + offset, to, len, 0); skb->csum = csum_block_add(skb->csum, csum, odd); + if (icmp_pointers[icmp_param->data.icmph.type].error) + nf_ct_attach(skb, icmp_param->skb); return 0; } @@ -346,14 +349,14 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, { struct sk_buff *skb; - ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, - icmp_param->data_len+icmp_param->head_len, - icmp_param->head_len, - ipc, rt, MSG_DONTWAIT); - - if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { + if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, + icmp_param->data_len+icmp_param->head_len, + icmp_param->head_len, + ipc, rt, MSG_DONTWAIT) < 0) + ip_flush_pending_frames(icmp_socket->sk); + else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { struct icmphdr *icmph = skb->h.icmph; - unsigned int csum = 0; + __wsum csum = 0; struct sk_buff *skb1; skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) { @@ -375,13 +378,13 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { struct sock *sk = icmp_socket->sk; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; struct rtable *rt = (struct rtable *)skb->dst; - u32 daddr; + __be32 daddr; if (ip_options_echo(&icmp_param->replyopts, skb)) - goto out; + return; if (icmp_xmit_lock()) return; @@ -403,6 +406,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) .saddr = rt->rt_spec_dst, .tos = RT_TOS(skb->nh.iph->tos) } }, .proto = IPPROTO_ICMP }; + security_skb_classify_flow(skb, &fl); if (ip_route_output_key(&rt, &fl)) goto out_unlock; } @@ -412,7 +416,6 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ip_rt_put(rt); out_unlock: icmp_xmit_unlock(); -out:; } @@ -427,14 +430,14 @@ out:; * MUST reply to only the first fragment. */ -void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) +void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) { struct iphdr *iph; int room; struct icmp_bxm icmp_param; struct rtable *rt = (struct rtable *)skb_in->dst; struct ipcm_cookie ipc; - u32 saddr; + __be32 saddr; u8 tos; if (!rt) @@ -478,20 +481,25 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) * ICMP error */ if (iph->protocol == IPPROTO_ICMP) { - u8 inner_type; - - if (skb_copy_bits(skb_in, - skb_in->nh.raw + (iph->ihl << 2) + - offsetof(struct icmphdr, type) - - skb_in->data, &inner_type, 1)) + u8 _inner_type, *itp; + + itp = skb_header_pointer(skb_in, + skb_in->nh.raw + + (iph->ihl << 2) + + offsetof(struct icmphdr, + type) - + skb_in->data, + sizeof(_inner_type), + &_inner_type); + if (itp == NULL) goto out; /* * Assume any unknown ICMP type is an error. This * isn't specified by the RFC, but think about it.. */ - if (inner_type > NR_ICMP_TYPES || - icmp_pointers[inner_type].error) + if (*itp > NR_ICMP_TYPES || + icmp_pointers[*itp].error) goto out; } } @@ -503,35 +511,20 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) * Construct source address and options. */ -#ifdef CONFIG_IP_ROUTE_NAT - /* - * Restore original addresses if packet has been translated. - */ - if (rt->rt_flags & RTCF_NAT && IPCB(skb_in)->flags & IPSKB_TRANSLATED) { - iph->daddr = rt->fl.fl4_dst; - iph->saddr = rt->fl.fl4_src; - } -#endif - saddr = iph->daddr; - if (!(rt->rt_flags & RTCF_LOCAL)) - saddr = 0; + if (!(rt->rt_flags & RTCF_LOCAL)) { + if (sysctl_icmp_errors_use_inbound_ifaddr) + saddr = inet_select_addr(skb_in->dev, 0, RT_SCOPE_LINK); + else + saddr = 0; + } tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : iph->tos; - { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = iph->saddr, - .saddr = saddr, - .tos = RT_TOS(tos) } }, - .proto = IPPROTO_ICMP }; - - if (ip_route_output_key(&rt, &fl)) - goto out_unlock; - } if (ip_options_echo(&icmp_param.replyopts, skb_in)) - goto ende; + goto out_unlock; /* @@ -548,13 +541,27 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) inet_sk(icmp_socket->sk)->tos = tos; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts; - if (icmp_param.replyopts.srr) { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = icmp_param.replyopts.faddr, - .saddr = saddr, - .tos = RT_TOS(tos) } }, - .proto = IPPROTO_ICMP }; - ip_rt_put(rt); + + { + struct flowi fl = { + .nl_u = { + .ip4_u = { + .daddr = icmp_param.replyopts.srr ? + icmp_param.replyopts.faddr : + iph->saddr, + .saddr = saddr, + .tos = RT_TOS(tos) + } + }, + .proto = IPPROTO_ICMP, + .uli_u = { + .icmpt = { + .type = type, + .code = code + } + } + }; + security_skb_classify_flow(skb_in, &fl); if (ip_route_output_key(&rt, &fl)) goto out_unlock; } @@ -564,7 +571,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) /* RFC says return as much as we can without exceeding 576 bytes. */ - room = dst_pmtu(&rt->u.dst); + room = dst_mtu(&rt->u.dst); if (room > 576) room = 576; room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; @@ -593,7 +600,7 @@ static void icmp_unreach(struct sk_buff *skb) struct iphdr *iph; struct icmphdr *icmph; int hash, protocol; - struct inet_protocol *ipprot; + struct net_protocol *ipprot; struct sock *raw_sk; u32 info = 0; @@ -621,11 +628,10 @@ static void icmp_unreach(struct sk_buff *skb) break; case ICMP_FRAG_NEEDED: if (ipv4_config.no_pmtu_disc) { - LIMIT_NETDEBUG( - printk(KERN_INFO "ICMP: %u.%u.%u.%u: " + LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: " "fragmentation needed " "and DF set.\n", - NIPQUAD(iph->daddr))); + NIPQUAD(iph->daddr)); } else { info = ip_rt_frag_needed(iph, ntohs(icmph->un.frag.mtu)); @@ -634,10 +640,9 @@ static void icmp_unreach(struct sk_buff *skb) } break; case ICMP_SR_FAILED: - LIMIT_NETDEBUG( - printk(KERN_INFO "ICMP: %u.%u.%u.%u: Source " + LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: Source " "Route Failed.\n", - NIPQUAD(iph->daddr))); + NIPQUAD(iph->daddr)); break; default: break; @@ -706,8 +711,7 @@ static void icmp_unreach(struct sk_buff *skb) read_unlock(&raw_v4_lock); rcu_read_lock(); - ipprot = inet_protos[hash]; - smp_read_barrier_depends(); + ipprot = rcu_dereference(inet_protos[hash]); if (ipprot && ipprot->err_handler) ipprot->err_handler(skb, info); rcu_read_unlock(); @@ -715,7 +719,7 @@ static void icmp_unreach(struct sk_buff *skb) out: return; out_err: - ICMP_INC_STATS_BH(IcmpInErrors); + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); goto out; } @@ -727,7 +731,6 @@ out_err: static void icmp_redirect(struct sk_buff *skb) { struct iphdr *iph; - unsigned long ip; if (skb->len < sizeof(struct iphdr)) goto out_err; @@ -739,7 +742,6 @@ static void icmp_redirect(struct sk_buff *skb) goto out; iph = (struct iphdr *)skb->data; - ip = iph->daddr; switch (skb->h.icmph->code & 7) { case ICMP_REDIR_NET: @@ -749,14 +751,15 @@ static void icmp_redirect(struct sk_buff *skb) */ case ICMP_REDIR_HOST: case ICMP_REDIR_HOSTTOS: - ip_rt_redirect(skb->nh.iph->saddr, ip, skb->h.icmph->un.gateway, - iph->saddr, iph->tos, skb->dev); + ip_rt_redirect(skb->nh.iph->saddr, iph->daddr, + skb->h.icmph->un.gateway, + iph->saddr, skb->dev); break; } out: return; out_err: - ICMP_INC_STATS_BH(IcmpInErrors); + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); goto out; } @@ -824,7 +827,7 @@ static void icmp_timestamp(struct sk_buff *skb) out: return; out_err: - ICMP_INC_STATS_BH(IcmpInErrors); + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); goto out; } @@ -881,7 +884,6 @@ static void icmp_address_reply(struct sk_buff *skb) struct net_device *dev = skb->dev; struct in_device *in_dev; struct in_ifaddr *ifa; - u32 mask; if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC)) goto out; @@ -889,24 +891,26 @@ static void icmp_address_reply(struct sk_buff *skb) in_dev = in_dev_get(dev); if (!in_dev) goto out; - read_lock(&in_dev->lock); + rcu_read_lock(); if (in_dev->ifa_list && IN_DEV_LOG_MARTIANS(in_dev) && IN_DEV_FORWARD(in_dev)) { - if (skb_copy_bits(skb, 0, &mask, 4)) - BUG(); + __be32 _mask, *mp; + + mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask); + BUG_ON(mp == NULL); for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { - if (mask == ifa->ifa_mask && + if (*mp == ifa->ifa_mask && inet_ifa_match(rt->rt_src, ifa)) break; } if (!ifa && net_ratelimit()) { printk(KERN_INFO "Wrong address mask %u.%u.%u.%u from " "%s/%u.%u.%u.%u\n", - NIPQUAD(mask), dev->name, NIPQUAD(rt->rt_src)); + NIPQUAD(*mp), dev->name, NIPQUAD(rt->rt_src)); } } - read_unlock(&in_dev->lock); + rcu_read_unlock(); in_dev_put(in_dev); out:; } @@ -923,18 +927,17 @@ int icmp_rcv(struct sk_buff *skb) struct icmphdr *icmph; struct rtable *rt = (struct rtable *)skb->dst; - ICMP_INC_STATS_BH(IcmpInMsgs); + ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); switch (skb->ip_summed) { - case CHECKSUM_HW: - if (!(u16)csum_fold(skb->csum)) + case CHECKSUM_COMPLETE: + if (!csum_fold(skb->csum)) break; - NETDEBUG(if (net_ratelimit()) - printk(KERN_DEBUG "icmp v4 hw csum failure\n")); + /* fall through */ case CHECKSUM_NONE: - if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) + skb->csum = 0; + if (__skb_checksum_complete(skb)) goto error; - default:; } if (!pskb_pull(skb, sizeof(struct icmphdr))) @@ -963,7 +966,8 @@ int icmp_rcv(struct sk_buff *skb) * RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently * discarded if to broadcast/multicast. */ - if (icmph->type == ICMP_ECHO && + if ((icmph->type == ICMP_ECHO || + icmph->type == ICMP_TIMESTAMP) && sysctl_icmp_echo_ignore_broadcasts) { goto error; } @@ -975,140 +979,137 @@ int icmp_rcv(struct sk_buff *skb) } } - ICMP_INC_STATS_BH_FIELD(icmp_pointers[icmph->type].input_off); + ICMP_INC_STATS_BH(icmp_pointers[icmph->type].input_entry); icmp_pointers[icmph->type].handler(skb); drop: kfree_skb(skb); return 0; error: - ICMP_INC_STATS_BH(IcmpInErrors); + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); goto drop; } /* * This table is the definition of how we handle ICMP. */ -static struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { +static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { [ICMP_ECHOREPLY] = { - .output_off = offsetof(struct icmp_mib, IcmpOutEchoReps), - .input_off = offsetof(struct icmp_mib, IcmpInEchoReps), + .output_entry = ICMP_MIB_OUTECHOREPS, + .input_entry = ICMP_MIB_INECHOREPS, .handler = icmp_discard, }, [1] = { - .output_off = offsetof(struct icmp_mib, dummy), - .input_off = offsetof(struct icmp_mib,IcmpInErrors), + .output_entry = ICMP_MIB_DUMMY, + .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [2] = { - .output_off = offsetof(struct icmp_mib, dummy), - .input_off = offsetof(struct icmp_mib,IcmpInErrors), + .output_entry = ICMP_MIB_DUMMY, + .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [ICMP_DEST_UNREACH] = { - .output_off = offsetof(struct icmp_mib, IcmpOutDestUnreachs), - .input_off = offsetof(struct icmp_mib, IcmpInDestUnreachs), + .output_entry = ICMP_MIB_OUTDESTUNREACHS, + .input_entry = ICMP_MIB_INDESTUNREACHS, .handler = icmp_unreach, .error = 1, }, [ICMP_SOURCE_QUENCH] = { - .output_off = offsetof(struct icmp_mib, IcmpOutSrcQuenchs), - .input_off = offsetof(struct icmp_mib, IcmpInSrcQuenchs), + .output_entry = ICMP_MIB_OUTSRCQUENCHS, + .input_entry = ICMP_MIB_INSRCQUENCHS, .handler = icmp_unreach, .error = 1, }, [ICMP_REDIRECT] = { - .output_off = offsetof(struct icmp_mib, IcmpOutRedirects), - .input_off = offsetof(struct icmp_mib, IcmpInRedirects), + .output_entry = ICMP_MIB_OUTREDIRECTS, + .input_entry = ICMP_MIB_INREDIRECTS, .handler = icmp_redirect, .error = 1, }, [6] = { - .output_off = offsetof(struct icmp_mib, dummy), - .input_off = offsetof(struct icmp_mib, IcmpInErrors), + .output_entry = ICMP_MIB_DUMMY, + .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [7] = { - .output_off = offsetof(struct icmp_mib, dummy), - .input_off = offsetof(struct icmp_mib, IcmpInErrors), + .output_entry = ICMP_MIB_DUMMY, + .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [ICMP_ECHO] = { - .output_off = offsetof(struct icmp_mib, IcmpOutEchos), - .input_off = offsetof(struct icmp_mib, IcmpInEchos), + .output_entry = ICMP_MIB_OUTECHOS, + .input_entry = ICMP_MIB_INECHOS, .handler = icmp_echo, }, [9] = { - .output_off = offsetof(struct icmp_mib, dummy), - .input_off = offsetof(struct icmp_mib, IcmpInErrors), + .output_entry = ICMP_MIB_DUMMY, + .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [10] = { - .output_off = offsetof(struct icmp_mib, dummy), - .input_off = offsetof(struct icmp_mib, IcmpInErrors), + .output_entry = ICMP_MIB_DUMMY, + .input_entry = ICMP_MIB_INERRORS, .handler = icmp_discard, .error = 1, }, [ICMP_TIME_EXCEEDED] = { - .output_off = offsetof(struct icmp_mib, IcmpOutTimeExcds), - .input_off = offsetof(struct icmp_mib,IcmpInTimeExcds), + .output_entry = ICMP_MIB_OUTTIMEEXCDS, + .input_entry = ICMP_MIB_INTIMEEXCDS, .handler = icmp_unreach, .error = 1, }, [ICMP_PARAMETERPROB] = { - .output_off = offsetof(struct icmp_mib, IcmpOutParmProbs), - .input_off = offsetof(struct icmp_mib, IcmpInParmProbs), + .output_entry = ICMP_MIB_OUTPARMPROBS, + .input_entry = ICMP_MIB_INPARMPROBS, .handler = icmp_unreach, .error = 1, }, [ICMP_TIMESTAMP] = { - .output_off = offsetof(struct icmp_mib, IcmpOutTimestamps), - .input_off = offsetof(struct icmp_mib, IcmpInTimestamps), + .output_entry = ICMP_MIB_OUTTIMESTAMPS, + .input_entry = ICMP_MIB_INTIMESTAMPS, .handler = icmp_timestamp, }, [ICMP_TIMESTAMPREPLY] = { - .output_off = offsetof(struct icmp_mib, IcmpOutTimestampReps), - .input_off = offsetof(struct icmp_mib, IcmpInTimestampReps), + .output_entry = ICMP_MIB_OUTTIMESTAMPREPS, + .input_entry = ICMP_MIB_INTIMESTAMPREPS, .handler = icmp_discard, }, [ICMP_INFO_REQUEST] = { - .output_off = offsetof(struct icmp_mib, dummy), - .input_off = offsetof(struct icmp_mib, dummy), + .output_entry = ICMP_MIB_DUMMY, + .input_entry = ICMP_MIB_DUMMY, .handler = icmp_discard, }, [ICMP_INFO_REPLY] = { - .output_off = offsetof(struct icmp_mib, dummy), - .input_off = offsetof(struct icmp_mib, dummy), + .output_entry = ICMP_MIB_DUMMY, + .input_entry = ICMP_MIB_DUMMY, .handler = icmp_discard, }, [ICMP_ADDRESS] = { - .output_off = offsetof(struct icmp_mib, IcmpOutAddrMasks), - .input_off = offsetof(struct icmp_mib, IcmpInAddrMasks), + .output_entry = ICMP_MIB_OUTADDRMASKS, + .input_entry = ICMP_MIB_INADDRMASKS, .handler = icmp_address, }, [ICMP_ADDRESSREPLY] = { - .output_off = offsetof(struct icmp_mib, IcmpOutAddrMaskReps), - .input_off = offsetof(struct icmp_mib, IcmpInAddrMaskReps), + .output_entry = ICMP_MIB_OUTADDRMASKREPS, + .input_entry = ICMP_MIB_INADDRMASKREPS, .handler = icmp_address_reply, }, }; void __init icmp_init(struct net_proto_family *ops) { - struct inet_opt *inet; + struct inet_sock *inet; int i; - for (i = 0; i < NR_CPUS; i++) { + for_each_possible_cpu(i) { int err; - if (!cpu_possible(i)) - continue; - err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, &per_cpu(__icmp_socket, i));