X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Fnetfilter%2Fip_nat_core.c;fp=net%2Fipv4%2Fnetfilter%2Fip_nat_core.c;h=1741d555ad0dd5e29ab17305aa0ddc77065fd89e;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=9fc6f93af0dd15de8933a14e053bf8cdf41dfb69;hpb=cee37fe97739d85991964371c1f3a745c00dd236;p=linux-2.6.git diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 9fc6f93af..1741d555a 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -22,8 +22,8 @@ #include #include -#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) -#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) +#define ASSERT_READ_LOCK(x) +#define ASSERT_WRITE_LOCK(x) #include #include @@ -41,14 +41,45 @@ #define DEBUGP(format, args...) #endif -DECLARE_RWLOCK(ip_nat_lock); +DEFINE_RWLOCK(ip_nat_lock); /* Calculated at init based on memory size */ static unsigned int ip_nat_htable_size; static struct list_head *bysource; -struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; +#define MAX_IP_NAT_PROTO 256 +static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; + +static inline struct ip_nat_protocol * +__ip_nat_proto_find(u_int8_t protonum) +{ + return ip_nat_protos[protonum]; +} + +struct ip_nat_protocol * +ip_nat_proto_find_get(u_int8_t protonum) +{ + struct ip_nat_protocol *p; + + /* we need to disable preemption to make sure 'p' doesn't get + * removed until we've grabbed the reference */ + preempt_disable(); + p = __ip_nat_proto_find(protonum); + if (!try_module_get(p->me)) + p = &ip_nat_unknown_protocol; + preempt_enable(); + + return p; +} +EXPORT_SYMBOL_GPL(ip_nat_proto_find_get); + +void +ip_nat_proto_put(struct ip_nat_protocol *p) +{ + module_put(p->me); +} +EXPORT_SYMBOL_GPL(ip_nat_proto_put); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int @@ -65,9 +96,9 @@ static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn) if (!(conn->status & IPS_NAT_DONE_MASK)) return; - WRITE_LOCK(&ip_nat_lock); + write_lock_bh(&ip_nat_lock); list_del(&conn->nat.info.bysource); - WRITE_UNLOCK(&ip_nat_lock); + write_unlock_bh(&ip_nat_lock); } /* We do checksum mangling, so if they were wrong before they're still @@ -80,6 +111,7 @@ ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) return csum_fold(csum_partial((char *)diffs, sizeof(diffs), oldcheck^0xFFFF)); } +EXPORT_SYMBOL(ip_nat_cheat_check); /* Is this tuple already taken? (not by us) */ int @@ -96,6 +128,7 @@ ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, invert_tuplepr(&reply, tuple); return ip_conntrack_tuple_taken(&reply, ignored_conntrack); } +EXPORT_SYMBOL(ip_nat_used_tuple); /* If we source map this tuple so reply looks like reply_tuple, will * that meet the constraints of range. */ @@ -103,7 +136,8 @@ static int in_range(const struct ip_conntrack_tuple *tuple, const struct ip_nat_range *range) { - struct ip_nat_protocol *proto = ip_nat_find_proto(tuple->dst.protonum); + struct ip_nat_protocol *proto = + __ip_nat_proto_find(tuple->dst.protonum); /* If we are supposed to map IPs, then we must be in the range specified, otherwise let this drag us onto a new src IP. */ @@ -142,7 +176,7 @@ find_appropriate_src(const struct ip_conntrack_tuple *tuple, unsigned int h = hash_by_src(tuple); struct ip_conntrack *ct; - READ_LOCK(&ip_nat_lock); + read_lock_bh(&ip_nat_lock); list_for_each_entry(ct, &bysource[h], nat.info.bysource) { if (same_src(ct, tuple)) { /* Copy source part from reply tuple. */ @@ -151,12 +185,12 @@ find_appropriate_src(const struct ip_conntrack_tuple *tuple, result->dst = tuple->dst; if (in_range(result, range)) { - READ_UNLOCK(&ip_nat_lock); + read_unlock_bh(&ip_nat_lock); return 1; } } } - READ_UNLOCK(&ip_nat_lock); + read_unlock_bh(&ip_nat_lock); return 0; } @@ -216,8 +250,7 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, struct ip_conntrack *conntrack, enum ip_nat_manip_type maniptype) { - struct ip_nat_protocol *proto - = ip_nat_find_proto(orig_tuple->dst.protonum); + struct ip_nat_protocol *proto; /* 1) If this srcip/proto/src-proto-part is currently mapped, and that same mapping gives a unique tuple within the given @@ -242,14 +275,20 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, /* 3) The per-protocol part of the manip is made to map into the range to make a unique tuple. */ + proto = ip_nat_proto_find_get(orig_tuple->dst.protonum); + /* Only bother mapping if it's not already in range and unique */ if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || proto->in_range(tuple, maniptype, &range->min, &range->max)) - && !ip_nat_used_tuple(tuple, conntrack)) + && !ip_nat_used_tuple(tuple, conntrack)) { + ip_nat_proto_put(proto); return; + } /* Last change: get protocol to try to obtain unique tuple. */ proto->unique_tuple(tuple, range, maniptype, conntrack); + + ip_nat_proto_put(proto); } unsigned int @@ -297,9 +336,9 @@ ip_nat_setup_info(struct ip_conntrack *conntrack, unsigned int srchash = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL] .tuple); - WRITE_LOCK(&ip_nat_lock); + write_lock_bh(&ip_nat_lock); list_add(&info->bysource, &bysource[srchash]); - WRITE_UNLOCK(&ip_nat_lock); + write_unlock_bh(&ip_nat_lock); } /* It's done. */ @@ -310,6 +349,7 @@ ip_nat_setup_info(struct ip_conntrack *conntrack, return NF_ACCEPT; } +EXPORT_SYMBOL(ip_nat_setup_info); /* Returns true if succeeded. */ static int @@ -320,17 +360,20 @@ manip_pkt(u_int16_t proto, enum ip_nat_manip_type maniptype) { struct iphdr *iph; + struct ip_nat_protocol *p; - (*pskb)->nfcache |= NFC_ALTERED; - if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph))) + if (!skb_make_writable(pskb, iphdroff + sizeof(*iph))) return 0; iph = (void *)(*pskb)->data + iphdroff; /* Manipulate protcol part. */ - if (!ip_nat_find_proto(proto)->manip_pkt(pskb, iphdroff, - target, maniptype)) + p = ip_nat_proto_find_get(proto); + if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) { + ip_nat_proto_put(p); return 0; + } + ip_nat_proto_put(p); iph = (void *)(*pskb)->data + iphdroff; @@ -347,10 +390,10 @@ manip_pkt(u_int16_t proto, } /* Do packet manipulations according to ip_nat_setup_info. */ -unsigned int nat_packet(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff **pskb) +unsigned int ip_nat_packet(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; @@ -377,12 +420,13 @@ unsigned int nat_packet(struct ip_conntrack *ct, } return NF_ACCEPT; } +EXPORT_SYMBOL_GPL(ip_nat_packet); /* Dir is direction ICMP is coming from (opposite to packet it contains) */ -int icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_nat_manip_type manip, - enum ip_conntrack_dir dir) +int ip_nat_icmp_reply_translation(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_nat_manip_type manip, + enum ip_conntrack_dir dir) { struct { struct icmphdr icmp; @@ -390,8 +434,9 @@ int icmp_reply_translation(struct sk_buff **pskb, } *inside; struct ip_conntrack_tuple inner, target; int hdrlen = (*pskb)->nh.iph->ihl * 4; + unsigned long statusbit; - if (!skb_ip_make_writable(pskb, hdrlen + sizeof(*inside))) + if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) return 0; inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; @@ -426,7 +471,8 @@ int icmp_reply_translation(struct sk_buff **pskb, if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr) + inside->ip.ihl*4, - &inner, ip_ct_find_proto(inside->ip.protocol))) + &inner, + __ip_conntrack_proto_find(inside->ip.protocol))) return 0; /* Change inner back to look like incoming packet. We do the @@ -450,17 +496,16 @@ int icmp_reply_translation(struct sk_buff **pskb, /* Change outer to look the reply to an incoming packet * (proto 0 means don't invert per-proto part). */ + if (manip == IP_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; - /* Obviously, we need to NAT destination IP, but source IP - should be NAT'ed only if it is from a NAT'd host. + /* Invert if this is reply dir. */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; - Explanation: some people use NAT for anonymizing. Also, - CERT recommends dropping all packets from private IP - addresses (although ICMP errors from internal links with - such addresses are not too uncommon, as Alan Cox points - out) */ - if (manip != IP_NAT_MANIP_SRC - || ((*pskb)->nh.iph->saddr == ct->tuplehash[dir].tuple.src.ip)) { + if (ct->status & statusbit) { invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); if (!manip_pkt(0, pskb, 0, &target, manip)) return 0; @@ -468,35 +513,83 @@ int icmp_reply_translation(struct sk_buff **pskb, return 1; } +EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation); /* Protocol registration. */ int ip_nat_protocol_register(struct ip_nat_protocol *proto) { int ret = 0; - WRITE_LOCK(&ip_nat_lock); + write_lock_bh(&ip_nat_lock); if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) { ret = -EBUSY; goto out; } ip_nat_protos[proto->protonum] = proto; out: - WRITE_UNLOCK(&ip_nat_lock); + write_unlock_bh(&ip_nat_lock); return ret; } +EXPORT_SYMBOL(ip_nat_protocol_register); /* Noone stores the protocol anywhere; simply delete it. */ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) { - WRITE_LOCK(&ip_nat_lock); + write_lock_bh(&ip_nat_lock); ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol; - WRITE_UNLOCK(&ip_nat_lock); + write_unlock_bh(&ip_nat_lock); /* Someone could be still looking at the proto in a bh. */ synchronize_net(); } +EXPORT_SYMBOL(ip_nat_protocol_unregister); + +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +int +ip_nat_port_range_to_nfattr(struct sk_buff *skb, + const struct ip_nat_range *range) +{ + NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(u_int16_t), + &range->min.tcp.port); + NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(u_int16_t), + &range->max.tcp.port); + + return 0; + +nfattr_failure: + return -1; +} + +int +ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) +{ + int ret = 0; + + /* we have to return whether we actually parsed something or not */ + + if (tb[CTA_PROTONAT_PORT_MIN-1]) { + ret = 1; + range->min.tcp.port = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); + } + + if (!tb[CTA_PROTONAT_PORT_MAX-1]) { + if (ret) + range->max.tcp.port = range->min.tcp.port; + } else { + ret = 1; + range->max.tcp.port = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); + } + + return ret; +} +EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range); +EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr); +#endif -int __init ip_nat_init(void) +static int __init ip_nat_init(void) { size_t i; @@ -509,13 +602,13 @@ int __init ip_nat_init(void) return -ENOMEM; /* Sew in builtin protocols. */ - WRITE_LOCK(&ip_nat_lock); + write_lock_bh(&ip_nat_lock); for (i = 0; i < MAX_IP_NAT_PROTO; i++) ip_nat_protos[i] = &ip_nat_unknown_protocol; ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp; ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp; ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp; - WRITE_UNLOCK(&ip_nat_lock); + write_unlock_bh(&ip_nat_lock); for (i = 0; i < ip_nat_htable_size; i++) { INIT_LIST_HEAD(&bysource[i]); @@ -538,10 +631,14 @@ static int clean_nat(struct ip_conntrack *i, void *data) return 0; } -/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */ -void ip_nat_cleanup(void) +static void __exit ip_nat_cleanup(void) { ip_ct_iterate_cleanup(&clean_nat, NULL); ip_conntrack_destroyed = NULL; vfree(bysource); } + +MODULE_LICENSE("GPL"); + +module_init(ip_nat_init); +module_exit(ip_nat_cleanup);