X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Fnetfilter%2Fip_nat_helper.c;h=ee80feb4b2a986a5fa5526834a50f458f5d23459;hb=refs%2Fheads%2Fvserver;hp=a49c722adbc195f8647f9b1228b272444774ffd8;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index a49c722ad..ee80feb4b 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -15,7 +15,6 @@ * - make ip_nat_resize_packet more generic (TCP and UDP) * - add ip_nat_mangle_udp_packet */ -#include #include #include #include @@ -28,16 +27,12 @@ #include #include -#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) -#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) - #include #include #include #include #include #include -#include #if 0 #define DEBUGP printk @@ -47,7 +42,7 @@ #define DUMP_OFFSET(x) #endif -DECLARE_LOCK(ip_nat_seqofs_lock); +static DEFINE_SPINLOCK(ip_nat_seqofs_lock); /* Setup TCP sequence correction given this change at this sequence */ static inline void @@ -70,9 +65,9 @@ adjust_tcp_sequence(u32 seq, DEBUGP("ip_nat_resize_packet: Seq_offset before: "); DUMP_OFFSET(this_way); - LOCK_BH(&ip_nat_seqofs_lock); + spin_lock_bh(&ip_nat_seqofs_lock); - /* SYN adjust. If it's uninitialized, of this is after last + /* SYN adjust. If it's uninitialized, or this is after last * correction, record it: we don't handle more than one * adjustment in the window, but do deal with common case of a * retransmit */ @@ -82,7 +77,7 @@ adjust_tcp_sequence(u32 seq, this_way->offset_before = this_way->offset_after; this_way->offset_after += sizediff; } - UNLOCK_BH(&ip_nat_seqofs_lock); + spin_unlock_bh(&ip_nat_seqofs_lock); DEBUGP("ip_nat_resize_packet: Seq_offset after: "); DUMP_OFFSET(this_way); @@ -142,9 +137,6 @@ static int enlarge_skb(struct sk_buff **pskb, unsigned int extra) /* Transfer socket to new skb. */ if ((*pskb)->sk) skb_set_owner_w(nskb, (*pskb)->sk); -#ifdef CONFIG_NETFILTER_DEBUG - nskb->nf_debug = (*pskb)->nf_debug; -#endif kfree_skb(*pskb); *pskb = nskb; return 1; @@ -169,9 +161,9 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, { struct iphdr *iph; struct tcphdr *tcph; - int datalen; + int oldlen, datalen; - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return 0; if (rep_len > match_len @@ -184,19 +176,32 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, iph = (*pskb)->nh.iph; tcph = (void *)iph + iph->ihl*4; + oldlen = (*pskb)->len - iph->ihl*4; mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4, match_offset, match_len, rep_buffer, rep_len); datalen = (*pskb)->len - iph->ihl*4; - tcph->check = 0; - tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr, - csum_partial((char *)tcph, datalen, 0)); - - adjust_tcp_sequence(ntohl(tcph->seq), - (int)rep_len - (int)match_len, - ct, ctinfo); + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + tcph->check = 0; + tcph->check = tcp_v4_check(tcph, datalen, + iph->saddr, iph->daddr, + csum_partial((char *)tcph, + datalen, 0)); + } else + nf_proto_csum_replace2(&tcph->check, *pskb, + htons(oldlen), htons(datalen), 1); + + if (rep_len != match_len) { + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); + adjust_tcp_sequence(ntohl(tcph->seq), + (int)rep_len - (int)match_len, + ct, ctinfo); + /* Tell TCP window tracking about seq change */ + ip_conntrack_tcp_update(*pskb, ct, CTINFO2DIR(ctinfo)); + } return 1; } +EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); /* Generic function for mangling variable-length address changes inside * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX @@ -219,6 +224,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, { struct iphdr *iph; struct udphdr *udph; + int datalen, oldlen; /* UDP helpers might accidentally mangle the wrong packet */ iph = (*pskb)->nh.iph; @@ -226,7 +232,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, match_offset + match_len) return 0; - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return 0; if (rep_len > match_len @@ -236,24 +242,33 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, iph = (*pskb)->nh.iph; udph = (void *)iph + iph->ihl*4; + + oldlen = (*pskb)->len - iph->ihl*4; mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph), match_offset, match_len, rep_buffer, rep_len); /* update the length of the UDP packet */ - udph->len = htons((*pskb)->len - iph->ihl*4); + datalen = (*pskb)->len - iph->ihl*4; + udph->len = htons(datalen); /* fix udp checksum if udp checksum was previously calculated */ - if (udph->check) { - int datalen = (*pskb)->len - iph->ihl * 4; + if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL) + return 1; + + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { udph->check = 0; udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, IPPROTO_UDP, csum_partial((char *)udph, datalen, 0)); - } - + if (!udph->check) + udph->check = CSUM_MANGLED_0; + } else + nf_proto_csum_replace2(&udph->check, *pskb, + htons(oldlen), htons(datalen), 1); return 1; } +EXPORT_SYMBOL(ip_nat_mangle_udp_packet); /* Adjust one found SACK option including checksum correction */ static void @@ -264,37 +279,34 @@ sack_adjust(struct sk_buff *skb, struct ip_nat_seq *natseq) { while (sackoff < sackend) { - struct tcp_sack_block *sack; - u_int32_t new_start_seq, new_end_seq; + struct tcp_sack_block_wire *sack; + __be32 new_start_seq, new_end_seq; sack = (void *)skb->data + sackoff; if (after(ntohl(sack->start_seq) - natseq->offset_before, natseq->correction_pos)) - new_start_seq = ntohl(sack->start_seq) - - natseq->offset_after; + new_start_seq = htonl(ntohl(sack->start_seq) + - natseq->offset_after); else - new_start_seq = ntohl(sack->start_seq) - - natseq->offset_before; - new_start_seq = htonl(new_start_seq); + new_start_seq = htonl(ntohl(sack->start_seq) + - natseq->offset_before); if (after(ntohl(sack->end_seq) - natseq->offset_before, natseq->correction_pos)) - new_end_seq = ntohl(sack->end_seq) - - natseq->offset_after; + new_end_seq = htonl(ntohl(sack->end_seq) + - natseq->offset_after); else - new_end_seq = ntohl(sack->end_seq) - - natseq->offset_before; - new_end_seq = htonl(new_end_seq); + new_end_seq = htonl(ntohl(sack->end_seq) + - natseq->offset_before); DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", ntohl(sack->start_seq), new_start_seq, ntohl(sack->end_seq), new_end_seq); - tcph->check = - ip_nat_cheat_check(~sack->start_seq, new_start_seq, - ip_nat_cheat_check(~sack->end_seq, - new_end_seq, - tcph->check)); + nf_proto_csum_replace4(&tcph->check, skb, + sack->start_seq, new_start_seq, 0); + nf_proto_csum_replace4(&tcph->check, skb, + sack->end_seq, new_end_seq, 0); sack->start_seq = new_start_seq; sack->end_seq = new_end_seq; sackoff += sizeof(*sack); @@ -313,7 +325,7 @@ ip_nat_sack_adjust(struct sk_buff **pskb, optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr); optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4; - if (!skb_ip_make_writable(pskb, optend)) + if (!skb_make_writable(pskb, optend)) return 0; dir = CTINFO2DIR(ctinfo); @@ -346,14 +358,15 @@ ip_nat_sack_adjust(struct sk_buff **pskb, return 1; } -/* TCP sequence number adjustment. Returns true or false. */ +/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ int ip_nat_seq_adjust(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { struct tcphdr *tcph; - int dir, newseq, newack; + int dir; + __be32 newseq, newack; struct ip_nat_seq *this_way, *other_way; dir = CTINFO2DIR(ctinfo); @@ -361,32 +374,23 @@ ip_nat_seq_adjust(struct sk_buff **pskb, this_way = &ct->nat.info.seq[dir]; other_way = &ct->nat.info.seq[!dir]; - /* No adjustments to make? Very common case. */ - if (!this_way->offset_before && !this_way->offset_after - && !other_way->offset_before && !other_way->offset_after) - return 1; - - if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) + if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) return 0; tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; if (after(ntohl(tcph->seq), this_way->correction_pos)) - newseq = ntohl(tcph->seq) + this_way->offset_after; + newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); else - newseq = ntohl(tcph->seq) + this_way->offset_before; - newseq = htonl(newseq); + newseq = htonl(ntohl(tcph->seq) + this_way->offset_before); if (after(ntohl(tcph->ack_seq) - other_way->offset_before, other_way->correction_pos)) - newack = ntohl(tcph->ack_seq) - other_way->offset_after; + newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after); else - newack = ntohl(tcph->ack_seq) - other_way->offset_before; - newack = htonl(newack); + newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); - tcph->check = ip_nat_cheat_check(~tcph->seq, newseq, - ip_nat_cheat_check(~tcph->ack_seq, - newack, - tcph->check)); + nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0); + nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0); DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), @@ -395,60 +399,38 @@ ip_nat_seq_adjust(struct sk_buff **pskb, tcph->seq = newseq; tcph->ack_seq = newack; - return ip_nat_sack_adjust(pskb, tcph, ct, ctinfo); -} - -static inline int -helper_cmp(const struct ip_nat_helper *helper, - const struct ip_conntrack_tuple *tuple) -{ - return ip_ct_tuple_mask_cmp(tuple, &helper->tuple, &helper->mask); -} - -int ip_nat_helper_register(struct ip_nat_helper *me) -{ - int ret = 0; - - WRITE_LOCK(&ip_nat_lock); - if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,&me->tuple)) - ret = -EBUSY; - else - list_prepend(&helpers, me); - WRITE_UNLOCK(&ip_nat_lock); - - return ret; -} - -static int -kill_helper(const struct ip_conntrack *i, void *helper) -{ - int ret; + if (!ip_nat_sack_adjust(pskb, tcph, ct, ctinfo)) + return 0; - READ_LOCK(&ip_nat_lock); - ret = (i->nat.info.helper == helper); - READ_UNLOCK(&ip_nat_lock); + ip_conntrack_tcp_update(*pskb, ct, dir); - return ret; + return 1; } +EXPORT_SYMBOL(ip_nat_seq_adjust); -void ip_nat_helper_unregister(struct ip_nat_helper *me) +/* Setup NAT on this expected conntrack so it follows master. */ +/* If we fail to get a free NAT slot, we'll get dropped on confirm */ +void ip_nat_follow_master(struct ip_conntrack *ct, + struct ip_conntrack_expect *exp) { - WRITE_LOCK(&ip_nat_lock); - /* Autoloading conntrack helper might have failed */ - if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,&me->tuple)) { - LIST_DELETE(&helpers, me); - } - WRITE_UNLOCK(&ip_nat_lock); - - /* Someone could be still looking at the helper in a bh. */ - synchronize_net(); - - /* Find anything using it, and umm, kill them. We can't turn - them into normal connections: if we've adjusted SYNs, then - they'll ackstorm. So we just drop it. We used to just - bump module count when a connection existed, but that - forces admins to gen fake RSTs or bounce box, either of - which is just a long-winded way of making things - worse. --RR */ - ip_ct_selective_cleanup(kill_helper, me); + struct ip_nat_range range; + + /* This must be a fresh one. */ + BUG_ON(ct->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = IP_NAT_RANGE_MAP_IPS; + range.min_ip = range.max_ip + = ct->master->tuplehash[!exp->dir].tuple.dst.ip; + /* hook doesn't matter, but it has to do source manip */ + ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); + + /* For DST manip, map port here to where it's expected. */ + range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); + range.min = range.max = exp->saved_proto; + range.min_ip = range.max_ip + = ct->master->tuplehash[!exp->dir].tuple.src.ip; + /* hook doesn't matter, but it has to do destination manip */ + ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); } +EXPORT_SYMBOL(ip_nat_follow_master);