#include <linux/udp.h>
#include <linux/jhash.h>
-#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
-#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
-
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/ip_nat_core.h>
#include <linux/netfilter_ipv4/ip_nat_helper.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
#if 0
#define DEBUGP printk
#define DEBUGP(format, args...)
#endif
-DECLARE_RWLOCK(ip_nat_lock);
+DEFINE_RWLOCK(ip_nat_lock);
/* Calculated at init based on memory size */
static unsigned int ip_nat_htable_size;
static struct list_head *bysource;
-struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
+#define MAX_IP_NAT_PROTO 256
+static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
+
+static inline struct ip_nat_protocol *
+__ip_nat_proto_find(u_int8_t protonum)
+{
+ return ip_nat_protos[protonum];
+}
+
+struct ip_nat_protocol *
+ip_nat_proto_find_get(u_int8_t protonum)
+{
+ struct ip_nat_protocol *p;
+
+ /* we need to disable preemption to make sure 'p' doesn't get
+ * removed until we've grabbed the reference */
+ preempt_disable();
+ p = __ip_nat_proto_find(protonum);
+ if (!try_module_get(p->me))
+ p = &ip_nat_unknown_protocol;
+ preempt_enable();
+
+ return p;
+}
+EXPORT_SYMBOL_GPL(ip_nat_proto_find_get);
+
+void
+ip_nat_proto_put(struct ip_nat_protocol *p)
+{
+ module_put(p->me);
+}
+EXPORT_SYMBOL_GPL(ip_nat_proto_put);
/* We keep an extra hash for each conntrack, for fast searching. */
static inline unsigned int
hash_by_src(const struct ip_conntrack_tuple *tuple)
{
/* Original src, to ensure we map it consistently if poss. */
- return jhash_3words(tuple->src.ip, tuple->src.u.all,
+ return jhash_3words((__force u32)tuple->src.ip, tuple->src.u.all,
tuple->dst.protonum, 0) % ip_nat_htable_size;
}
if (!(conn->status & IPS_NAT_DONE_MASK))
return;
- WRITE_LOCK(&ip_nat_lock);
+ write_lock_bh(&ip_nat_lock);
list_del(&conn->nat.info.bysource);
- WRITE_UNLOCK(&ip_nat_lock);
-}
-
-/* We do checksum mangling, so if they were wrong before they're still
- * wrong. Also works for incomplete packets (eg. ICMP dest
- * unreachables.) */
-u_int16_t
-ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
-{
- u_int32_t diffs[] = { oldvalinv, newval };
- return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
- oldcheck^0xFFFF));
+ write_unlock_bh(&ip_nat_lock);
}
/* Is this tuple already taken? (not by us) */
invert_tuplepr(&reply, tuple);
return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
}
+EXPORT_SYMBOL(ip_nat_used_tuple);
/* If we source map this tuple so reply looks like reply_tuple, will
* that meet the constraints of range. */
in_range(const struct ip_conntrack_tuple *tuple,
const struct ip_nat_range *range)
{
- struct ip_nat_protocol *proto = ip_nat_find_proto(tuple->dst.protonum);
+ struct ip_nat_protocol *proto =
+ __ip_nat_proto_find(tuple->dst.protonum);
/* If we are supposed to map IPs, then we must be in the
range specified, otherwise let this drag us onto a new src IP. */
unsigned int h = hash_by_src(tuple);
struct ip_conntrack *ct;
- READ_LOCK(&ip_nat_lock);
+ read_lock_bh(&ip_nat_lock);
list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
if (same_src(ct, tuple)) {
/* Copy source part from reply tuple. */
result->dst = tuple->dst;
if (in_range(result, range)) {
- READ_UNLOCK(&ip_nat_lock);
+ read_unlock_bh(&ip_nat_lock);
return 1;
}
}
}
- READ_UNLOCK(&ip_nat_lock);
+ read_unlock_bh(&ip_nat_lock);
return 0;
}
const struct ip_conntrack *conntrack,
enum ip_nat_manip_type maniptype)
{
- u_int32_t *var_ipp;
+ __be32 *var_ipp;
/* Host order */
u_int32_t minip, maxip, j;
* like this), even across reboots. */
minip = ntohl(range->min_ip);
maxip = ntohl(range->max_ip);
- j = jhash_2words(tuple->src.ip, tuple->dst.ip, 0);
+ j = jhash_2words((__force u32)tuple->src.ip, (__force u32)tuple->dst.ip, 0);
*var_ipp = htonl(minip + j % (maxip - minip + 1));
}
struct ip_conntrack *conntrack,
enum ip_nat_manip_type maniptype)
{
- struct ip_nat_protocol *proto
- = ip_nat_find_proto(orig_tuple->dst.protonum);
+ struct ip_nat_protocol *proto;
/* 1) If this srcip/proto/src-proto-part is currently mapped,
and that same mapping gives a unique tuple within the given
/* 3) The per-protocol part of the manip is made to map into
the range to make a unique tuple. */
+ proto = ip_nat_proto_find_get(orig_tuple->dst.protonum);
+
/* Only bother mapping if it's not already in range and unique */
if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
|| proto->in_range(tuple, maniptype, &range->min, &range->max))
- && !ip_nat_used_tuple(tuple, conntrack))
+ && !ip_nat_used_tuple(tuple, conntrack)) {
+ ip_nat_proto_put(proto);
return;
+ }
/* Last change: get protocol to try to obtain unique tuple. */
proto->unique_tuple(tuple, range, maniptype, conntrack);
+
+ ip_nat_proto_put(proto);
}
unsigned int
unsigned int srchash
= hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple);
- WRITE_LOCK(&ip_nat_lock);
+ write_lock_bh(&ip_nat_lock);
list_add(&info->bysource, &bysource[srchash]);
- WRITE_UNLOCK(&ip_nat_lock);
+ write_unlock_bh(&ip_nat_lock);
}
/* It's done. */
return NF_ACCEPT;
}
+EXPORT_SYMBOL(ip_nat_setup_info);
/* Returns true if succeeded. */
static int
enum ip_nat_manip_type maniptype)
{
struct iphdr *iph;
+ struct ip_nat_protocol *p;
- (*pskb)->nfcache |= NFC_ALTERED;
- if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph)))
+ if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
return 0;
iph = (void *)(*pskb)->data + iphdroff;
/* Manipulate protcol part. */
- if (!ip_nat_find_proto(proto)->manip_pkt(pskb, iphdroff,
- target, maniptype))
+ p = ip_nat_proto_find_get(proto);
+ if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) {
+ ip_nat_proto_put(p);
return 0;
+ }
+ ip_nat_proto_put(p);
iph = (void *)(*pskb)->data + iphdroff;
if (maniptype == IP_NAT_MANIP_SRC) {
- iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip,
- iph->check);
+ nf_csum_replace4(&iph->check, iph->saddr, target->src.ip);
iph->saddr = target->src.ip;
} else {
- iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip,
- iph->check);
+ nf_csum_replace4(&iph->check, iph->daddr, target->dst.ip);
iph->daddr = target->dst.ip;
}
return 1;
}
/* Do packet manipulations according to ip_nat_setup_info. */
-unsigned int nat_packet(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- struct sk_buff **pskb)
+unsigned int ip_nat_packet(struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum,
+ struct sk_buff **pskb)
{
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
unsigned long statusbit;
}
return NF_ACCEPT;
}
+EXPORT_SYMBOL_GPL(ip_nat_packet);
/* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int icmp_reply_translation(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_nat_manip_type manip,
- enum ip_conntrack_dir dir)
+int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum,
+ struct sk_buff **pskb)
{
struct {
struct icmphdr icmp;
} *inside;
struct ip_conntrack_tuple inner, target;
int hdrlen = (*pskb)->nh.iph->ihl * 4;
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ unsigned long statusbit;
+ enum ip_nat_manip_type manip = HOOK2MANIP(hooknum);
- if (!skb_ip_make_writable(pskb, hdrlen + sizeof(*inside)))
+ if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
return 0;
inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
/* We're actually going to mangle it beyond trivial checksum
adjustment, so make sure the current checksum is correct. */
- if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) {
- hdrlen = (*pskb)->nh.iph->ihl * 4;
- if ((u16)csum_fold(skb_checksum(*pskb, hdrlen,
- (*pskb)->len - hdrlen, 0)))
- return 0;
- }
+ if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
+ return 0;
/* Must be RELATED */
IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 +
sizeof(struct icmphdr) + inside->ip.ihl*4,
- &inner, ip_ct_find_proto(inside->ip.protocol)))
+ &inner,
+ __ip_conntrack_proto_find(inside->ip.protocol)))
return 0;
/* Change inner back to look like incoming packet. We do the
!manip))
return 0;
- /* Reloading "inside" here since manip_pkt inner. */
- inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
- inside->icmp.checksum = 0;
- inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
- (*pskb)->len - hdrlen,
- 0));
+ if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ /* Reloading "inside" here since manip_pkt inner. */
+ inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+ inside->icmp.checksum = 0;
+ inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
+ (*pskb)->len - hdrlen,
+ 0));
+ }
/* Change outer to look the reply to an incoming packet
* (proto 0 means don't invert per-proto part). */
+ if (manip == IP_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
- /* Obviously, we need to NAT destination IP, but source IP
- should be NAT'ed only if it is from a NAT'd host.
+ /* Invert if this is reply dir. */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
- Explanation: some people use NAT for anonymizing. Also,
- CERT recommends dropping all packets from private IP
- addresses (although ICMP errors from internal links with
- such addresses are not too uncommon, as Alan Cox points
- out) */
- if (manip != IP_NAT_MANIP_SRC
- || ((*pskb)->nh.iph->saddr == ct->tuplehash[dir].tuple.src.ip)) {
+ if (ct->status & statusbit) {
invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
if (!manip_pkt(0, pskb, 0, &target, manip))
return 0;
return 1;
}
+EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation);
/* Protocol registration. */
int ip_nat_protocol_register(struct ip_nat_protocol *proto)
{
int ret = 0;
- WRITE_LOCK(&ip_nat_lock);
+ write_lock_bh(&ip_nat_lock);
if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
ret = -EBUSY;
goto out;
}
ip_nat_protos[proto->protonum] = proto;
out:
- WRITE_UNLOCK(&ip_nat_lock);
+ write_unlock_bh(&ip_nat_lock);
return ret;
}
+EXPORT_SYMBOL(ip_nat_protocol_register);
/* Noone stores the protocol anywhere; simply delete it. */
void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
{
- WRITE_LOCK(&ip_nat_lock);
+ write_lock_bh(&ip_nat_lock);
ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
- WRITE_UNLOCK(&ip_nat_lock);
+ write_unlock_bh(&ip_nat_lock);
/* Someone could be still looking at the proto in a bh. */
synchronize_net();
}
+EXPORT_SYMBOL(ip_nat_protocol_unregister);
+
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+int
+ip_nat_port_range_to_nfattr(struct sk_buff *skb,
+ const struct ip_nat_range *range)
+{
+ NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16),
+ &range->min.tcp.port);
+ NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16),
+ &range->max.tcp.port);
+
+ return 0;
+
+nfattr_failure:
+ return -1;
+}
-int __init ip_nat_init(void)
+int
+ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
+{
+ int ret = 0;
+
+ /* we have to return whether we actually parsed something or not */
+
+ if (tb[CTA_PROTONAT_PORT_MIN-1]) {
+ ret = 1;
+ range->min.tcp.port =
+ *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
+ }
+
+ if (!tb[CTA_PROTONAT_PORT_MAX-1]) {
+ if (ret)
+ range->max.tcp.port = range->min.tcp.port;
+ } else {
+ ret = 1;
+ range->max.tcp.port =
+ *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range);
+EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
+#endif
+
+static int __init ip_nat_init(void)
{
size_t i;
return -ENOMEM;
/* Sew in builtin protocols. */
- WRITE_LOCK(&ip_nat_lock);
+ write_lock_bh(&ip_nat_lock);
for (i = 0; i < MAX_IP_NAT_PROTO; i++)
ip_nat_protos[i] = &ip_nat_unknown_protocol;
ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
- WRITE_UNLOCK(&ip_nat_lock);
+ write_unlock_bh(&ip_nat_lock);
for (i = 0; i < ip_nat_htable_size; i++) {
INIT_LIST_HEAD(&bysource[i]);
return 0;
}
-/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */
-void ip_nat_cleanup(void)
+static void __exit ip_nat_cleanup(void)
{
ip_ct_iterate_cleanup(&clean_nat, NULL);
ip_conntrack_destroyed = NULL;
vfree(bysource);
}
+
+MODULE_LICENSE("GPL");
+
+module_init(ip_nat_init);
+module_exit(ip_nat_cleanup);