X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Fnetfilter%2Fip_nat_core.c;h=9d1a5175dcd40bb0bd7b6e4fcdd8d914c08fc56e;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=9fc6f93af0dd15de8933a14e053bf8cdf41dfb69;hpb=f7f1b0f1e2fbadeab12d24236000e778aa9b1ead;p=linux-2.6.git diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 9fc6f93af..9d1a5175d 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -22,9 +22,6 @@ #include #include -#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) -#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) - #include #include #include @@ -33,7 +30,6 @@ #include #include #include -#include #if 0 #define DEBUGP printk @@ -41,21 +37,52 @@ #define DEBUGP(format, args...) #endif -DECLARE_RWLOCK(ip_nat_lock); +DEFINE_RWLOCK(ip_nat_lock); /* Calculated at init based on memory size */ static unsigned int ip_nat_htable_size; static struct list_head *bysource; -struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; +#define MAX_IP_NAT_PROTO 256 +static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; + +static inline struct ip_nat_protocol * +__ip_nat_proto_find(u_int8_t protonum) +{ + return ip_nat_protos[protonum]; +} + +struct ip_nat_protocol * +ip_nat_proto_find_get(u_int8_t protonum) +{ + struct ip_nat_protocol *p; + + /* we need to disable preemption to make sure 'p' doesn't get + * removed until we've grabbed the reference */ + preempt_disable(); + p = __ip_nat_proto_find(protonum); + if (!try_module_get(p->me)) + p = &ip_nat_unknown_protocol; + preempt_enable(); + + return p; +} +EXPORT_SYMBOL_GPL(ip_nat_proto_find_get); + +void +ip_nat_proto_put(struct ip_nat_protocol *p) +{ + module_put(p->me); +} +EXPORT_SYMBOL_GPL(ip_nat_proto_put); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int hash_by_src(const struct ip_conntrack_tuple *tuple) { /* Original src, to ensure we map it consistently if poss. */ - return jhash_3words(tuple->src.ip, tuple->src.u.all, + return jhash_3words((__force u32)tuple->src.ip, tuple->src.u.all, tuple->dst.protonum, 0) % ip_nat_htable_size; } @@ -65,20 +92,9 @@ static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn) if (!(conn->status & IPS_NAT_DONE_MASK)) return; - WRITE_LOCK(&ip_nat_lock); + write_lock_bh(&ip_nat_lock); list_del(&conn->nat.info.bysource); - WRITE_UNLOCK(&ip_nat_lock); -} - -/* We do checksum mangling, so if they were wrong before they're still - * wrong. Also works for incomplete packets (eg. ICMP dest - * unreachables.) */ -u_int16_t -ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) -{ - u_int32_t diffs[] = { oldvalinv, newval }; - return csum_fold(csum_partial((char *)diffs, sizeof(diffs), - oldcheck^0xFFFF)); + write_unlock_bh(&ip_nat_lock); } /* Is this tuple already taken? (not by us) */ @@ -96,6 +112,7 @@ ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, invert_tuplepr(&reply, tuple); return ip_conntrack_tuple_taken(&reply, ignored_conntrack); } +EXPORT_SYMBOL(ip_nat_used_tuple); /* If we source map this tuple so reply looks like reply_tuple, will * that meet the constraints of range. */ @@ -103,7 +120,8 @@ static int in_range(const struct ip_conntrack_tuple *tuple, const struct ip_nat_range *range) { - struct ip_nat_protocol *proto = ip_nat_find_proto(tuple->dst.protonum); + struct ip_nat_protocol *proto = + __ip_nat_proto_find(tuple->dst.protonum); /* If we are supposed to map IPs, then we must be in the range specified, otherwise let this drag us onto a new src IP. */ @@ -142,7 +160,7 @@ find_appropriate_src(const struct ip_conntrack_tuple *tuple, unsigned int h = hash_by_src(tuple); struct ip_conntrack *ct; - READ_LOCK(&ip_nat_lock); + read_lock_bh(&ip_nat_lock); list_for_each_entry(ct, &bysource[h], nat.info.bysource) { if (same_src(ct, tuple)) { /* Copy source part from reply tuple. */ @@ -151,12 +169,12 @@ find_appropriate_src(const struct ip_conntrack_tuple *tuple, result->dst = tuple->dst; if (in_range(result, range)) { - READ_UNLOCK(&ip_nat_lock); + read_unlock_bh(&ip_nat_lock); return 1; } } } - READ_UNLOCK(&ip_nat_lock); + read_unlock_bh(&ip_nat_lock); return 0; } @@ -172,7 +190,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple, const struct ip_conntrack *conntrack, enum ip_nat_manip_type maniptype) { - u_int32_t *var_ipp; + __be32 *var_ipp; /* Host order */ u_int32_t minip, maxip, j; @@ -199,7 +217,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple, * like this), even across reboots. */ minip = ntohl(range->min_ip); maxip = ntohl(range->max_ip); - j = jhash_2words(tuple->src.ip, tuple->dst.ip, 0); + j = jhash_2words((__force u32)tuple->src.ip, (__force u32)tuple->dst.ip, 0); *var_ipp = htonl(minip + j % (maxip - minip + 1)); } @@ -216,8 +234,7 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, struct ip_conntrack *conntrack, enum ip_nat_manip_type maniptype) { - struct ip_nat_protocol *proto - = ip_nat_find_proto(orig_tuple->dst.protonum); + struct ip_nat_protocol *proto; /* 1) If this srcip/proto/src-proto-part is currently mapped, and that same mapping gives a unique tuple within the given @@ -242,14 +259,20 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, /* 3) The per-protocol part of the manip is made to map into the range to make a unique tuple. */ + proto = ip_nat_proto_find_get(orig_tuple->dst.protonum); + /* Only bother mapping if it's not already in range and unique */ if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || proto->in_range(tuple, maniptype, &range->min, &range->max)) - && !ip_nat_used_tuple(tuple, conntrack)) + && !ip_nat_used_tuple(tuple, conntrack)) { + ip_nat_proto_put(proto); return; + } /* Last change: get protocol to try to obtain unique tuple. */ proto->unique_tuple(tuple, range, maniptype, conntrack); + + ip_nat_proto_put(proto); } unsigned int @@ -297,9 +320,9 @@ ip_nat_setup_info(struct ip_conntrack *conntrack, unsigned int srchash = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL] .tuple); - WRITE_LOCK(&ip_nat_lock); + write_lock_bh(&ip_nat_lock); list_add(&info->bysource, &bysource[srchash]); - WRITE_UNLOCK(&ip_nat_lock); + write_unlock_bh(&ip_nat_lock); } /* It's done. */ @@ -310,6 +333,7 @@ ip_nat_setup_info(struct ip_conntrack *conntrack, return NF_ACCEPT; } +EXPORT_SYMBOL(ip_nat_setup_info); /* Returns true if succeeded. */ static int @@ -320,37 +344,38 @@ manip_pkt(u_int16_t proto, enum ip_nat_manip_type maniptype) { struct iphdr *iph; + struct ip_nat_protocol *p; - (*pskb)->nfcache |= NFC_ALTERED; - if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph))) + if (!skb_make_writable(pskb, iphdroff + sizeof(*iph))) return 0; iph = (void *)(*pskb)->data + iphdroff; /* Manipulate protcol part. */ - if (!ip_nat_find_proto(proto)->manip_pkt(pskb, iphdroff, - target, maniptype)) + p = ip_nat_proto_find_get(proto); + if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) { + ip_nat_proto_put(p); return 0; + } + ip_nat_proto_put(p); iph = (void *)(*pskb)->data + iphdroff; if (maniptype == IP_NAT_MANIP_SRC) { - iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip, - iph->check); + nf_csum_replace4(&iph->check, iph->saddr, target->src.ip); iph->saddr = target->src.ip; } else { - iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip, - iph->check); + nf_csum_replace4(&iph->check, iph->daddr, target->dst.ip); iph->daddr = target->dst.ip; } return 1; } /* Do packet manipulations according to ip_nat_setup_info. */ -unsigned int nat_packet(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff **pskb) +unsigned int ip_nat_packet(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; @@ -377,12 +402,13 @@ unsigned int nat_packet(struct ip_conntrack *ct, } return NF_ACCEPT; } +EXPORT_SYMBOL_GPL(ip_nat_packet); /* Dir is direction ICMP is coming from (opposite to packet it contains) */ -int icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_nat_manip_type manip, - enum ip_conntrack_dir dir) +int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb) { struct { struct icmphdr icmp; @@ -390,20 +416,19 @@ int icmp_reply_translation(struct sk_buff **pskb, } *inside; struct ip_conntrack_tuple inner, target; int hdrlen = (*pskb)->nh.iph->ihl * 4; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + unsigned long statusbit; + enum ip_nat_manip_type manip = HOOK2MANIP(hooknum); - if (!skb_ip_make_writable(pskb, hdrlen + sizeof(*inside))) + if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) return 0; inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; /* We're actually going to mangle it beyond trivial checksum adjustment, so make sure the current checksum is correct. */ - if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) { - hdrlen = (*pskb)->nh.iph->ihl * 4; - if ((u16)csum_fold(skb_checksum(*pskb, hdrlen, - (*pskb)->len - hdrlen, 0))) - return 0; - } + if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0)) + return 0; /* Must be RELATED */ IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || @@ -426,7 +451,8 @@ int icmp_reply_translation(struct sk_buff **pskb, if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr) + inside->ip.ihl*4, - &inner, ip_ct_find_proto(inside->ip.protocol))) + &inner, + __ip_conntrack_proto_find(inside->ip.protocol))) return 0; /* Change inner back to look like incoming packet. We do the @@ -441,26 +467,27 @@ int icmp_reply_translation(struct sk_buff **pskb, !manip)) return 0; - /* Reloading "inside" here since manip_pkt inner. */ - inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; - inside->icmp.checksum = 0; - inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, - (*pskb)->len - hdrlen, - 0)); + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + /* Reloading "inside" here since manip_pkt inner. */ + inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; + inside->icmp.checksum = 0; + inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, + (*pskb)->len - hdrlen, + 0)); + } /* Change outer to look the reply to an incoming packet * (proto 0 means don't invert per-proto part). */ + if (manip == IP_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; - /* Obviously, we need to NAT destination IP, but source IP - should be NAT'ed only if it is from a NAT'd host. + /* Invert if this is reply dir. */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; - Explanation: some people use NAT for anonymizing. Also, - CERT recommends dropping all packets from private IP - addresses (although ICMP errors from internal links with - such addresses are not too uncommon, as Alan Cox points - out) */ - if (manip != IP_NAT_MANIP_SRC - || ((*pskb)->nh.iph->saddr == ct->tuplehash[dir].tuple.src.ip)) { + if (ct->status & statusbit) { invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); if (!manip_pkt(0, pskb, 0, &target, manip)) return 0; @@ -468,35 +495,83 @@ int icmp_reply_translation(struct sk_buff **pskb, return 1; } +EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation); /* Protocol registration. */ int ip_nat_protocol_register(struct ip_nat_protocol *proto) { int ret = 0; - WRITE_LOCK(&ip_nat_lock); + write_lock_bh(&ip_nat_lock); if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) { ret = -EBUSY; goto out; } ip_nat_protos[proto->protonum] = proto; out: - WRITE_UNLOCK(&ip_nat_lock); + write_unlock_bh(&ip_nat_lock); return ret; } +EXPORT_SYMBOL(ip_nat_protocol_register); /* Noone stores the protocol anywhere; simply delete it. */ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) { - WRITE_LOCK(&ip_nat_lock); + write_lock_bh(&ip_nat_lock); ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol; - WRITE_UNLOCK(&ip_nat_lock); + write_unlock_bh(&ip_nat_lock); /* Someone could be still looking at the proto in a bh. */ synchronize_net(); } +EXPORT_SYMBOL(ip_nat_protocol_unregister); + +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +int +ip_nat_port_range_to_nfattr(struct sk_buff *skb, + const struct ip_nat_range *range) +{ + NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16), + &range->min.tcp.port); + NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16), + &range->max.tcp.port); + + return 0; + +nfattr_failure: + return -1; +} -int __init ip_nat_init(void) +int +ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) +{ + int ret = 0; + + /* we have to return whether we actually parsed something or not */ + + if (tb[CTA_PROTONAT_PORT_MIN-1]) { + ret = 1; + range->min.tcp.port = + *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); + } + + if (!tb[CTA_PROTONAT_PORT_MAX-1]) { + if (ret) + range->max.tcp.port = range->min.tcp.port; + } else { + ret = 1; + range->max.tcp.port = + *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); + } + + return ret; +} +EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range); +EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr); +#endif + +static int __init ip_nat_init(void) { size_t i; @@ -509,13 +584,13 @@ int __init ip_nat_init(void) return -ENOMEM; /* Sew in builtin protocols. */ - WRITE_LOCK(&ip_nat_lock); + write_lock_bh(&ip_nat_lock); for (i = 0; i < MAX_IP_NAT_PROTO; i++) ip_nat_protos[i] = &ip_nat_unknown_protocol; ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp; ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp; ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp; - WRITE_UNLOCK(&ip_nat_lock); + write_unlock_bh(&ip_nat_lock); for (i = 0; i < ip_nat_htable_size; i++) { INIT_LIST_HEAD(&bysource[i]); @@ -538,10 +613,14 @@ static int clean_nat(struct ip_conntrack *i, void *data) return 0; } -/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */ -void ip_nat_cleanup(void) +static void __exit ip_nat_cleanup(void) { ip_ct_iterate_cleanup(&clean_nat, NULL); ip_conntrack_destroyed = NULL; vfree(bysource); } + +MODULE_LICENSE("GPL"); + +module_init(ip_nat_init); +module_exit(ip_nat_cleanup);