1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
12 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
13 * - new API and handling of conntrack/nat helpers
14 * - now capable of multiple expectations for one master
15 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
16 * - add usage/reference counts to ip_conntrack_expect
17 * - export ip_conntrack[_expect]_{find_get,put} functions
20 #include <linux/config.h>
21 #include <linux/types.h>
22 #include <linux/icmp.h>
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/module.h>
27 #include <linux/skbuff.h>
28 #include <linux/proc_fs.h>
29 #include <linux/vmalloc.h>
30 #include <net/checksum.h>
32 #include <linux/stddef.h>
33 #include <linux/sysctl.h>
34 #include <linux/slab.h>
35 #include <linux/random.h>
36 #include <linux/jhash.h>
37 /* For ERR_PTR(). Yeah, I know... --RR */
40 /* This rwlock protects the main hash table, protocol/helper/expected
41 registrations, conntrack timers*/
42 #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
43 #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
45 #include <linux/netfilter_ipv4/ip_conntrack.h>
46 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
47 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
48 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
49 #include <linux/netfilter_ipv4/listhelp.h>
51 #define IP_CONNTRACK_VERSION "2.1"
56 #define DEBUGP(format, args...)
59 DECLARE_RWLOCK(ip_conntrack_lock);
60 DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock);
62 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
63 LIST_HEAD(ip_conntrack_expect_list);
64 LIST_HEAD(protocol_list);
65 static LIST_HEAD(helpers);
66 unsigned int ip_conntrack_htable_size = 0;
68 static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
69 struct list_head *ip_conntrack_hash;
70 static kmem_cache_t *ip_conntrack_cachep;
71 struct ip_conntrack ip_conntrack_untracked;
73 extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
75 static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr,
78 return protocol == curr->proto;
81 struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol)
83 struct ip_conntrack_protocol *p;
85 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
86 p = LIST_FIND(&protocol_list, proto_cmpfn,
87 struct ip_conntrack_protocol *, protocol);
89 p = &ip_conntrack_generic_protocol;
94 struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol)
96 struct ip_conntrack_protocol *p;
98 READ_LOCK(&ip_conntrack_lock);
99 p = __ip_ct_find_proto(protocol);
100 READ_UNLOCK(&ip_conntrack_lock);
105 ip_conntrack_put(struct ip_conntrack *ct)
108 IP_NF_ASSERT(ct->infos[0].master);
109 /* nf_conntrack_put wants to go via an info struct, so feed it
111 nf_conntrack_put(&ct->infos[0]);
114 static int ip_conntrack_hash_rnd_initted;
115 static unsigned int ip_conntrack_hash_rnd;
118 hash_conntrack(const struct ip_conntrack_tuple *tuple)
123 return (jhash_3words(tuple->src.ip,
124 (tuple->dst.ip ^ tuple->dst.protonum),
125 (tuple->src.u.all | (tuple->dst.u.all << 16)),
126 ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
130 get_tuple(const struct iphdr *iph,
131 const struct sk_buff *skb,
132 unsigned int dataoff,
133 struct ip_conntrack_tuple *tuple,
134 const struct ip_conntrack_protocol *protocol)
137 if (iph->frag_off & htons(IP_OFFSET)) {
138 printk("ip_conntrack_core: Frag of proto %u.\n",
143 tuple->src.ip = iph->saddr;
144 tuple->dst.ip = iph->daddr;
145 tuple->dst.protonum = iph->protocol;
147 return protocol->pkt_to_tuple(skb, dataoff, tuple);
151 invert_tuple(struct ip_conntrack_tuple *inverse,
152 const struct ip_conntrack_tuple *orig,
153 const struct ip_conntrack_protocol *protocol)
155 inverse->src.ip = orig->dst.ip;
156 inverse->dst.ip = orig->src.ip;
157 inverse->dst.protonum = orig->dst.protonum;
159 return protocol->invert_tuple(inverse, orig);
163 /* ip_conntrack_expect helper functions */
165 /* Compare tuple parts depending on mask. */
166 static inline int expect_cmp(const struct ip_conntrack_expect *i,
167 const struct ip_conntrack_tuple *tuple)
169 MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
170 return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask);
174 destroy_expect(struct ip_conntrack_expect *exp)
176 DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(&exp->use));
177 IP_NF_ASSERT(atomic_read(&exp->use) == 0);
178 IP_NF_ASSERT(!timer_pending(&exp->timeout));
183 inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
187 if (atomic_dec_and_test(&exp->use)) {
188 /* usage count dropped to zero */
193 static inline struct ip_conntrack_expect *
194 __ip_ct_expect_find(const struct ip_conntrack_tuple *tuple)
196 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
197 MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
198 return LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
199 struct ip_conntrack_expect *, tuple);
202 /* Find a expectation corresponding to a tuple. */
203 struct ip_conntrack_expect *
204 ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
206 struct ip_conntrack_expect *exp;
208 READ_LOCK(&ip_conntrack_lock);
209 READ_LOCK(&ip_conntrack_expect_tuple_lock);
210 exp = __ip_ct_expect_find(tuple);
212 atomic_inc(&exp->use);
213 READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
214 READ_UNLOCK(&ip_conntrack_lock);
219 /* remove one specific expectation from all lists and drop refcount,
220 * does _NOT_ delete the timer. */
221 static void __unexpect_related(struct ip_conntrack_expect *expect)
223 DEBUGP("unexpect_related(%p)\n", expect);
224 MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
226 /* we're not allowed to unexpect a confirmed expectation! */
227 IP_NF_ASSERT(!expect->sibling);
229 /* delete from global and local lists */
230 list_del(&expect->list);
231 list_del(&expect->expected_list);
233 /* decrement expect-count of master conntrack */
234 if (expect->expectant)
235 expect->expectant->expecting--;
237 ip_conntrack_expect_put(expect);
240 /* remove one specific expecatation from all lists, drop refcount
242 * This function can _NOT_ be called for confirmed expects! */
243 static void unexpect_related(struct ip_conntrack_expect *expect)
245 IP_NF_ASSERT(expect->expectant);
246 IP_NF_ASSERT(expect->expectant->helper);
247 /* if we are supposed to have a timer, but we can't delete
248 * it: race condition. __unexpect_related will
249 * be calledd by timeout function */
250 if (expect->expectant->helper->timeout
251 && !del_timer(&expect->timeout))
254 __unexpect_related(expect);
257 /* delete all unconfirmed expectations for this conntrack */
258 static void remove_expectations(struct ip_conntrack *ct, int drop_refcount)
260 struct list_head *exp_entry, *next;
261 struct ip_conntrack_expect *exp;
263 DEBUGP("remove_expectations(%p)\n", ct);
265 list_for_each_safe(exp_entry, next, &ct->sibling_list) {
266 exp = list_entry(exp_entry, struct ip_conntrack_expect,
269 /* we skip established expectations, as we want to delete
270 * the un-established ones only */
272 DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct);
274 /* Indicate that this expectations parent is dead */
275 ip_conntrack_put(exp->expectant);
276 exp->expectant = NULL;
281 IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp));
282 IP_NF_ASSERT(exp->expectant == ct);
284 /* delete expectation from global and private lists */
285 unexpect_related(exp);
290 clean_from_lists(struct ip_conntrack *ct)
294 DEBUGP("clean_from_lists(%p)\n", ct);
295 MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
297 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
298 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
299 LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
300 LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
302 /* Destroy all un-established, pending expectations */
303 remove_expectations(ct, 1);
307 destroy_conntrack(struct nf_conntrack *nfct)
309 struct ip_conntrack *ct = (struct ip_conntrack *)nfct, *master = NULL;
310 struct ip_conntrack_protocol *proto;
312 DEBUGP("destroy_conntrack(%p)\n", ct);
313 IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
314 IP_NF_ASSERT(!timer_pending(&ct->timeout));
316 /* To make sure we don't get any weird locking issues here:
317 * destroy_conntrack() MUST NOT be called with a write lock
318 * to ip_conntrack_lock!!! -HW */
319 proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
320 if (proto && proto->destroy)
323 if (ip_conntrack_destroyed)
324 ip_conntrack_destroyed(ct);
326 WRITE_LOCK(&ip_conntrack_lock);
327 /* Make sure don't leave any orphaned expectations lying around */
329 remove_expectations(ct, 1);
331 /* Delete our master expectation */
333 if (ct->master->expectant) {
334 /* can't call __unexpect_related here,
335 * since it would screw up expect_list */
336 list_del(&ct->master->expected_list);
337 master = ct->master->expectant;
341 WRITE_UNLOCK(&ip_conntrack_lock);
344 ip_conntrack_put(master);
346 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
347 kmem_cache_free(ip_conntrack_cachep, ct);
348 atomic_dec(&ip_conntrack_count);
351 static void death_by_timeout(unsigned long ul_conntrack)
353 struct ip_conntrack *ct = (void *)ul_conntrack;
355 WRITE_LOCK(&ip_conntrack_lock);
356 clean_from_lists(ct);
357 WRITE_UNLOCK(&ip_conntrack_lock);
358 ip_conntrack_put(ct);
362 conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
363 const struct ip_conntrack_tuple *tuple,
364 const struct ip_conntrack *ignored_conntrack)
366 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
367 return i->ctrack != ignored_conntrack
368 && ip_ct_tuple_equal(tuple, &i->tuple);
371 static struct ip_conntrack_tuple_hash *
372 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
373 const struct ip_conntrack *ignored_conntrack)
375 struct ip_conntrack_tuple_hash *h;
376 unsigned int hash = hash_conntrack(tuple);
378 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
379 h = LIST_FIND(&ip_conntrack_hash[hash],
381 struct ip_conntrack_tuple_hash *,
382 tuple, ignored_conntrack);
386 /* Find a connection corresponding to a tuple. */
387 struct ip_conntrack_tuple_hash *
388 ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
389 const struct ip_conntrack *ignored_conntrack)
391 struct ip_conntrack_tuple_hash *h;
393 READ_LOCK(&ip_conntrack_lock);
394 h = __ip_conntrack_find(tuple, ignored_conntrack);
396 atomic_inc(&h->ctrack->ct_general.use);
397 READ_UNLOCK(&ip_conntrack_lock);
402 static inline struct ip_conntrack *
403 __ip_conntrack_get(struct nf_ct_info *nfct, enum ip_conntrack_info *ctinfo)
405 struct ip_conntrack *ct
406 = (struct ip_conntrack *)nfct->master;
408 /* ctinfo is the index of the nfct inside the conntrack */
409 *ctinfo = nfct - ct->infos;
410 IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER);
414 /* Return conntrack and conntrack_info given skb->nfct->master */
415 struct ip_conntrack *
416 ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
419 return __ip_conntrack_get(skb->nfct, ctinfo);
423 /* Confirm a connection given skb->nfct; places it in hash table */
425 __ip_conntrack_confirm(struct nf_ct_info *nfct)
427 unsigned int hash, repl_hash;
428 struct ip_conntrack *ct;
429 enum ip_conntrack_info ctinfo;
431 ct = __ip_conntrack_get(nfct, &ctinfo);
433 /* ipt_REJECT uses ip_conntrack_attach to attach related
434 ICMP/TCP RST packets in other direction. Actual packet
435 which created connection will be IP_CT_NEW or for an
436 expected connection, IP_CT_RELATED. */
437 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
440 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
441 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
443 /* We're not in hash table, and we refuse to set up related
444 connections for unconfirmed conns. But packet copies and
445 REJECT will give spurious warnings here. */
446 /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
448 /* No external references means noone else could have
450 IP_NF_ASSERT(!is_confirmed(ct));
451 DEBUGP("Confirming conntrack %p\n", ct);
453 WRITE_LOCK(&ip_conntrack_lock);
454 /* See if there's one in the list already, including reverse:
455 NAT could have grabbed it without realizing, since we're
456 not in the hash. If there is, we lost race. */
457 if (!LIST_FIND(&ip_conntrack_hash[hash],
459 struct ip_conntrack_tuple_hash *,
460 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
461 && !LIST_FIND(&ip_conntrack_hash[repl_hash],
463 struct ip_conntrack_tuple_hash *,
464 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
465 list_prepend(&ip_conntrack_hash[hash],
466 &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
467 list_prepend(&ip_conntrack_hash[repl_hash],
468 &ct->tuplehash[IP_CT_DIR_REPLY]);
469 /* Timer relative to confirmation time, not original
470 setting time, otherwise we'd get timer wrap in
471 weird delay cases. */
472 ct->timeout.expires += jiffies;
473 add_timer(&ct->timeout);
474 atomic_inc(&ct->ct_general.use);
475 set_bit(IPS_CONFIRMED_BIT, &ct->status);
476 WRITE_UNLOCK(&ip_conntrack_lock);
480 WRITE_UNLOCK(&ip_conntrack_lock);
484 /* Returns true if a connection correspondings to the tuple (required
487 ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
488 const struct ip_conntrack *ignored_conntrack)
490 struct ip_conntrack_tuple_hash *h;
492 READ_LOCK(&ip_conntrack_lock);
493 h = __ip_conntrack_find(tuple, ignored_conntrack);
494 READ_UNLOCK(&ip_conntrack_lock);
499 /* Returns conntrack if it dealt with ICMP, and filled in skb fields */
500 struct ip_conntrack *
501 icmp_error_track(struct sk_buff *skb,
502 enum ip_conntrack_info *ctinfo,
503 unsigned int hooknum)
505 struct ip_conntrack_tuple innertuple, origtuple;
510 struct ip_conntrack_protocol *innerproto;
511 struct ip_conntrack_tuple_hash *h;
514 IP_NF_ASSERT(skb->nfct == NULL);
516 /* Not enough header? */
517 if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &inside, sizeof(inside))!=0)
520 if (inside.icmp.type != ICMP_DEST_UNREACH
521 && inside.icmp.type != ICMP_SOURCE_QUENCH
522 && inside.icmp.type != ICMP_TIME_EXCEEDED
523 && inside.icmp.type != ICMP_PARAMETERPROB
524 && inside.icmp.type != ICMP_REDIRECT)
527 /* Ignore ICMP's containing fragments (shouldn't happen) */
528 if (inside.ip.frag_off & htons(IP_OFFSET)) {
529 DEBUGP("icmp_error_track: fragment of proto %u\n",
534 innerproto = ip_ct_find_proto(inside.ip.protocol);
535 dataoff = skb->nh.iph->ihl*4 + sizeof(inside.icmp) + inside.ip.ihl*4;
536 /* Are they talking about one of our connections? */
537 if (!get_tuple(&inside.ip, skb, dataoff, &origtuple, innerproto)) {
538 DEBUGP("icmp_error: ! get_tuple p=%u", inside.ip.protocol);
542 /* Ordinarily, we'd expect the inverted tupleproto, but it's
543 been preserved inside the ICMP. */
544 if (!invert_tuple(&innertuple, &origtuple, innerproto)) {
545 DEBUGP("icmp_error_track: Can't invert tuple\n");
549 *ctinfo = IP_CT_RELATED;
551 h = ip_conntrack_find_get(&innertuple, NULL);
553 /* Locally generated ICMPs will match inverted if they
554 haven't been SNAT'ed yet */
555 /* FIXME: NAT code has to handle half-done double NAT --RR */
556 if (hooknum == NF_IP_LOCAL_OUT)
557 h = ip_conntrack_find_get(&origtuple, NULL);
560 DEBUGP("icmp_error_track: no match\n");
563 /* Reverse direction from that found */
564 if (DIRECTION(h) != IP_CT_DIR_REPLY)
565 *ctinfo += IP_CT_IS_REPLY;
567 if (DIRECTION(h) == IP_CT_DIR_REPLY)
568 *ctinfo += IP_CT_IS_REPLY;
571 /* Update skb to refer to this connection */
572 skb->nfct = &h->ctrack->infos[*ctinfo];
576 /* There's a small race here where we may free a just-assured
577 connection. Too bad: we're in trouble anyway. */
578 static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
580 return !(test_bit(IPS_ASSURED_BIT, &i->ctrack->status));
583 static int early_drop(struct list_head *chain)
585 /* Traverse backwards: gives us oldest, which is roughly LRU */
586 struct ip_conntrack_tuple_hash *h;
589 READ_LOCK(&ip_conntrack_lock);
590 h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
592 atomic_inc(&h->ctrack->ct_general.use);
593 READ_UNLOCK(&ip_conntrack_lock);
598 if (del_timer(&h->ctrack->timeout)) {
599 death_by_timeout((unsigned long)h->ctrack);
602 ip_conntrack_put(h->ctrack);
606 static inline int helper_cmp(const struct ip_conntrack_helper *i,
607 const struct ip_conntrack_tuple *rtuple)
609 return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
612 struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
614 return LIST_FIND(&helpers, helper_cmp,
615 struct ip_conntrack_helper *,
619 /* Allocate a new conntrack: we return -ENOMEM if classification
620 failed due to stress. Otherwise it really is unclassifiable. */
621 static struct ip_conntrack_tuple_hash *
622 init_conntrack(const struct ip_conntrack_tuple *tuple,
623 struct ip_conntrack_protocol *protocol,
626 struct ip_conntrack *conntrack;
627 struct ip_conntrack_tuple repl_tuple;
629 struct ip_conntrack_expect *expected;
631 static unsigned int drop_next;
633 if (!ip_conntrack_hash_rnd_initted) {
634 get_random_bytes(&ip_conntrack_hash_rnd, 4);
635 ip_conntrack_hash_rnd_initted = 1;
638 hash = hash_conntrack(tuple);
640 if (ip_conntrack_max &&
641 atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
642 /* Try dropping from random chain, or else from the
643 chain about to put into (in case they're trying to
644 bomb one hash chain). */
645 unsigned int next = (drop_next++)%ip_conntrack_htable_size;
647 if (!early_drop(&ip_conntrack_hash[next])
648 && !early_drop(&ip_conntrack_hash[hash])) {
651 "ip_conntrack: table full, dropping"
653 return ERR_PTR(-ENOMEM);
657 if (!invert_tuple(&repl_tuple, tuple, protocol)) {
658 DEBUGP("Can't invert tuple.\n");
662 conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
664 DEBUGP("Can't allocate conntrack.\n");
665 return ERR_PTR(-ENOMEM);
668 memset(conntrack, 0, sizeof(*conntrack));
669 atomic_set(&conntrack->ct_general.use, 1);
670 conntrack->ct_general.destroy = destroy_conntrack;
671 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
672 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack;
673 conntrack->xid[IP_CT_DIR_ORIGINAL] = -1;
674 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
675 conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack;
676 conntrack->xid[IP_CT_DIR_REPLY] = -1;
677 for (i=0; i < IP_CT_NUMBER; i++)
678 conntrack->infos[i].master = &conntrack->ct_general;
680 if (!protocol->new(conntrack, skb)) {
681 kmem_cache_free(ip_conntrack_cachep, conntrack);
684 /* Don't set timer yet: wait for confirmation */
685 init_timer(&conntrack->timeout);
686 conntrack->timeout.data = (unsigned long)conntrack;
687 conntrack->timeout.function = death_by_timeout;
689 INIT_LIST_HEAD(&conntrack->sibling_list);
691 WRITE_LOCK(&ip_conntrack_lock);
692 /* Need finding and deleting of expected ONLY if we win race */
693 READ_LOCK(&ip_conntrack_expect_tuple_lock);
694 expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
695 struct ip_conntrack_expect *, tuple);
696 READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
698 /* If master is not in hash table yet (ie. packet hasn't left
699 this machine yet), how can other end know about expected?
700 Hence these are not the droids you are looking for (if
701 master ct never got confirmed, we'd hold a reference to it
702 and weird things would happen to future packets). */
703 if (expected && !is_confirmed(expected->expectant))
706 /* Look up the conntrack helper for master connections only */
708 conntrack->helper = ip_ct_find_helper(&repl_tuple);
710 /* If the expectation is dying, then this is a loser. */
712 && expected->expectant->helper->timeout
713 && ! del_timer(&expected->timeout))
717 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
718 conntrack, expected);
719 /* Welcome, Mr. Bond. We've been expecting you... */
720 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
721 conntrack->master = expected;
722 expected->sibling = conntrack;
723 LIST_DELETE(&ip_conntrack_expect_list, expected);
724 expected->expectant->expecting--;
725 nf_conntrack_get(&master_ct(conntrack)->infos[0]);
727 atomic_inc(&ip_conntrack_count);
728 WRITE_UNLOCK(&ip_conntrack_lock);
730 if (expected && expected->expectfn)
731 expected->expectfn(conntrack);
732 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
735 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
736 static inline struct ip_conntrack *
737 resolve_normal_ct(struct sk_buff *skb,
738 struct ip_conntrack_protocol *proto,
740 unsigned int hooknum,
741 enum ip_conntrack_info *ctinfo)
743 struct ip_conntrack_tuple tuple;
744 struct ip_conntrack_tuple_hash *h;
746 IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
748 if (!get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, &tuple, proto))
751 /* look for tuple match */
752 h = ip_conntrack_find_get(&tuple, NULL);
754 h = init_conntrack(&tuple, proto, skb);
761 /* It exists; we have (non-exclusive) reference. */
762 if (DIRECTION(h) == IP_CT_DIR_REPLY) {
763 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
764 /* Please set reply bit if this packet OK */
767 /* Once we've had two way comms, always ESTABLISHED. */
768 if (test_bit(IPS_SEEN_REPLY_BIT, &h->ctrack->status)) {
769 DEBUGP("ip_conntrack_in: normal packet for %p\n",
771 *ctinfo = IP_CT_ESTABLISHED;
772 } else if (test_bit(IPS_EXPECTED_BIT, &h->ctrack->status)) {
773 DEBUGP("ip_conntrack_in: related packet for %p\n",
775 *ctinfo = IP_CT_RELATED;
777 DEBUGP("ip_conntrack_in: new packet for %p\n",
783 skb->nfct = &h->ctrack->infos[*ctinfo];
787 /* Netfilter hook itself. */
788 unsigned int ip_conntrack_in(unsigned int hooknum,
789 struct sk_buff **pskb,
790 const struct net_device *in,
791 const struct net_device *out,
792 int (*okfn)(struct sk_buff *))
794 struct ip_conntrack *ct;
795 enum ip_conntrack_info ctinfo;
796 struct ip_conntrack_protocol *proto;
801 if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
802 if (net_ratelimit()) {
803 printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
804 (*pskb)->nh.iph->protocol, hooknum);
809 /* FIXME: Do this right please. --RR */
810 (*pskb)->nfcache |= NFC_UNKNOWN;
812 /* Doesn't cover locally-generated broadcast, so not worth it. */
814 /* Ignore broadcast: no `connection'. */
815 if ((*pskb)->pkt_type == PACKET_BROADCAST) {
816 printk("Broadcast packet!\n");
818 } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
819 == htonl(0x000000FF)) {
820 printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
821 NIPQUAD((*pskb)->nh.iph->saddr),
822 NIPQUAD((*pskb)->nh.iph->daddr),
823 (*pskb)->sk, (*pskb)->pkt_type);
827 /* Previously seen (loopback or untracked)? Ignore. */
831 proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
833 /* It may be an icmp error... */
834 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
835 && icmp_error_track(*pskb, &ctinfo, hooknum))
838 if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo)))
839 /* Not valid part of a connection */
843 /* Too stressed to deal. */
846 IP_NF_ASSERT((*pskb)->nfct);
848 ret = proto->packet(ct, *pskb, ctinfo);
851 nf_conntrack_put((*pskb)->nfct);
852 (*pskb)->nfct = NULL;
856 if (ret != NF_DROP && ct->helper) {
857 ret = ct->helper->help(*pskb, ct, ctinfo);
860 nf_conntrack_put((*pskb)->nfct);
861 (*pskb)->nfct = NULL;
866 set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
871 int invert_tuplepr(struct ip_conntrack_tuple *inverse,
872 const struct ip_conntrack_tuple *orig)
874 return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum));
877 static inline int resent_expect(const struct ip_conntrack_expect *i,
878 const struct ip_conntrack_tuple *tuple,
879 const struct ip_conntrack_tuple *mask)
881 DEBUGP("resent_expect\n");
882 DEBUGP(" tuple: "); DUMP_TUPLE(&i->tuple);
883 DEBUGP("ct_tuple: "); DUMP_TUPLE(&i->ct_tuple);
884 DEBUGP("test tuple: "); DUMP_TUPLE(tuple);
885 return (((i->ct_tuple.dst.protonum == 0 && ip_ct_tuple_equal(&i->tuple, tuple))
886 || (i->ct_tuple.dst.protonum && ip_ct_tuple_equal(&i->ct_tuple, tuple)))
887 && ip_ct_tuple_equal(&i->mask, mask));
890 /* Would two expected things clash? */
891 static inline int expect_clash(const struct ip_conntrack_expect *i,
892 const struct ip_conntrack_tuple *tuple,
893 const struct ip_conntrack_tuple *mask)
895 /* Part covered by intersection of masks must be unequal,
896 otherwise they clash */
897 struct ip_conntrack_tuple intersect_mask
898 = { { i->mask.src.ip & mask->src.ip,
899 { i->mask.src.u.all & mask->src.u.all } },
900 { i->mask.dst.ip & mask->dst.ip,
901 { i->mask.dst.u.all & mask->dst.u.all },
902 i->mask.dst.protonum & mask->dst.protonum } };
904 return ip_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask);
907 inline void ip_conntrack_unexpect_related(struct ip_conntrack_expect *expect)
909 WRITE_LOCK(&ip_conntrack_lock);
910 unexpect_related(expect);
911 WRITE_UNLOCK(&ip_conntrack_lock);
914 static void expectation_timed_out(unsigned long ul_expect)
916 struct ip_conntrack_expect *expect = (void *) ul_expect;
918 DEBUGP("expectation %p timed out\n", expect);
919 WRITE_LOCK(&ip_conntrack_lock);
920 __unexpect_related(expect);
921 WRITE_UNLOCK(&ip_conntrack_lock);
924 struct ip_conntrack_expect *
925 ip_conntrack_expect_alloc(void)
927 struct ip_conntrack_expect *new;
929 new = (struct ip_conntrack_expect *)
930 kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC);
932 DEBUGP("expect_related: OOM allocating expect\n");
936 /* tuple_cmp compares whole union, we have to initialized cleanly */
937 memset(new, 0, sizeof(struct ip_conntrack_expect));
943 ip_conntrack_expect_insert(struct ip_conntrack_expect *new,
944 struct ip_conntrack *related_to)
946 DEBUGP("new expectation %p of conntrack %p\n", new, related_to);
947 new->expectant = related_to;
949 atomic_set(&new->use, 1);
951 /* add to expected list for this connection */
952 list_add_tail(&new->expected_list, &related_to->sibling_list);
953 /* add to global list of expectations */
954 list_prepend(&ip_conntrack_expect_list, &new->list);
955 /* add and start timer if required */
956 if (related_to->helper->timeout) {
957 init_timer(&new->timeout);
958 new->timeout.data = (unsigned long)new;
959 new->timeout.function = expectation_timed_out;
960 new->timeout.expires = jiffies +
961 related_to->helper->timeout * HZ;
962 add_timer(&new->timeout);
964 related_to->expecting++;
967 /* Add a related connection. */
968 int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
969 struct ip_conntrack *related_to)
971 struct ip_conntrack_expect *old;
974 WRITE_LOCK(&ip_conntrack_lock);
975 /* Because of the write lock, no reader can walk the lists,
976 * so there is no need to use the tuple lock too */
978 DEBUGP("ip_conntrack_expect_related %p\n", related_to);
979 DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
980 DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
982 old = LIST_FIND(&ip_conntrack_expect_list, resent_expect,
983 struct ip_conntrack_expect *, &expect->tuple,
986 /* Helper private data may contain offsets but no pointers
987 pointing into the payload - otherwise we should have to copy
988 the data filled out by the helper over the old one */
989 DEBUGP("expect_related: resent packet\n");
990 if (related_to->helper->timeout) {
991 if (!del_timer(&old->timeout)) {
992 /* expectation is dying. Fall through */
995 old->timeout.expires = jiffies +
996 related_to->helper->timeout * HZ;
997 add_timer(&old->timeout);
1001 WRITE_UNLOCK(&ip_conntrack_lock);
1005 } else if (related_to->helper->max_expected &&
1006 related_to->expecting >= related_to->helper->max_expected) {
1008 if (!(related_to->helper->flags &
1009 IP_CT_HELPER_F_REUSE_EXPECT)) {
1010 WRITE_UNLOCK(&ip_conntrack_lock);
1011 if (net_ratelimit())
1013 "ip_conntrack: max number of expected "
1014 "connections %i of %s reached for "
1015 "%u.%u.%u.%u->%u.%u.%u.%u\n",
1016 related_to->helper->max_expected,
1017 related_to->helper->name,
1018 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
1019 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
1023 DEBUGP("ip_conntrack: max number of expected "
1024 "connections %i of %s reached for "
1025 "%u.%u.%u.%u->%u.%u.%u.%u, reusing\n",
1026 related_to->helper->max_expected,
1027 related_to->helper->name,
1028 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
1029 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
1031 /* choose the the oldest expectation to evict */
1032 list_for_each_entry(old, &related_to->sibling_list,
1034 if (old->sibling == NULL)
1037 /* We cannot fail since related_to->expecting is the number
1038 * of unconfirmed expectations */
1039 IP_NF_ASSERT(old && old->sibling == NULL);
1041 /* newnat14 does not reuse the real allocated memory
1042 * structures but rather unexpects the old and
1043 * allocates a new. unexpect_related will decrement
1044 * related_to->expecting.
1046 unexpect_related(old);
1048 } else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash,
1049 struct ip_conntrack_expect *, &expect->tuple,
1051 WRITE_UNLOCK(&ip_conntrack_lock);
1052 DEBUGP("expect_related: busy!\n");
1058 out: ip_conntrack_expect_insert(expect, related_to);
1060 WRITE_UNLOCK(&ip_conntrack_lock);
1065 /* Change tuple in an existing expectation */
1066 int ip_conntrack_change_expect(struct ip_conntrack_expect *expect,
1067 struct ip_conntrack_tuple *newtuple)
1071 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
1072 WRITE_LOCK(&ip_conntrack_expect_tuple_lock);
1074 DEBUGP("change_expect:\n");
1075 DEBUGP("exp tuple: "); DUMP_TUPLE(&expect->tuple);
1076 DEBUGP("exp mask: "); DUMP_TUPLE(&expect->mask);
1077 DEBUGP("newtuple: "); DUMP_TUPLE(newtuple);
1078 if (expect->ct_tuple.dst.protonum == 0) {
1079 /* Never seen before */
1080 DEBUGP("change expect: never seen before\n");
1081 if (!ip_ct_tuple_equal(&expect->tuple, newtuple)
1082 && LIST_FIND(&ip_conntrack_expect_list, expect_clash,
1083 struct ip_conntrack_expect *, newtuple, &expect->mask)) {
1084 /* Force NAT to find an unused tuple */
1087 memcpy(&expect->ct_tuple, &expect->tuple, sizeof(expect->tuple));
1088 memcpy(&expect->tuple, newtuple, sizeof(expect->tuple));
1093 DEBUGP("change expect: resent packet\n");
1094 if (ip_ct_tuple_equal(&expect->tuple, newtuple)) {
1097 /* Force NAT to choose again the same port */
1101 WRITE_UNLOCK(&ip_conntrack_expect_tuple_lock);
1106 /* Alter reply tuple (maybe alter helper). If it's already taken,
1107 return 0 and don't do alteration. */
1108 int ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
1109 const struct ip_conntrack_tuple *newreply)
1111 WRITE_LOCK(&ip_conntrack_lock);
1112 if (__ip_conntrack_find(newreply, conntrack)) {
1113 WRITE_UNLOCK(&ip_conntrack_lock);
1116 /* Should be unconfirmed, so not in hash table yet */
1117 IP_NF_ASSERT(!is_confirmed(conntrack));
1119 DEBUGP("Altering reply tuple of %p to ", conntrack);
1120 DUMP_TUPLE(newreply);
1122 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1123 if (!conntrack->master && list_empty(&conntrack->sibling_list))
1124 conntrack->helper = ip_ct_find_helper(newreply);
1125 WRITE_UNLOCK(&ip_conntrack_lock);
1130 int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
1132 WRITE_LOCK(&ip_conntrack_lock);
1133 list_prepend(&helpers, me);
1134 WRITE_UNLOCK(&ip_conntrack_lock);
1139 static inline int unhelp(struct ip_conntrack_tuple_hash *i,
1140 const struct ip_conntrack_helper *me)
1142 if (i->ctrack->helper == me) {
1143 /* Get rid of any expected. */
1144 remove_expectations(i->ctrack, 0);
1145 /* And *then* set helper to NULL */
1146 i->ctrack->helper = NULL;
1151 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1155 /* Need write lock here, to delete helper. */
1156 WRITE_LOCK(&ip_conntrack_lock);
1157 LIST_DELETE(&helpers, me);
1159 /* Get rid of expecteds, set helpers to NULL. */
1160 for (i = 0; i < ip_conntrack_htable_size; i++)
1161 LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
1162 struct ip_conntrack_tuple_hash *, me);
1163 WRITE_UNLOCK(&ip_conntrack_lock);
1165 /* Someone could be still looking at the helper in a bh. */
1169 /* Refresh conntrack for this many jiffies. */
1170 void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies)
1172 IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
1174 /* If not in hash table, timer will not be active yet */
1175 if (!is_confirmed(ct))
1176 ct->timeout.expires = extra_jiffies;
1178 WRITE_LOCK(&ip_conntrack_lock);
1179 /* Need del_timer for race avoidance (may already be dying). */
1180 if (del_timer(&ct->timeout)) {
1181 ct->timeout.expires = jiffies + extra_jiffies;
1182 add_timer(&ct->timeout);
1184 WRITE_UNLOCK(&ip_conntrack_lock);
1188 /* Returns new sk_buff, or NULL */
1190 ip_ct_gather_frags(struct sk_buff *skb)
1192 struct sock *sk = skb->sk;
1193 #ifdef CONFIG_NETFILTER_DEBUG
1194 unsigned int olddebug = skb->nf_debug;
1202 skb = ip_defrag(skb);
1212 skb_set_owner_w(skb, sk);
1216 ip_send_check(skb->nh.iph);
1217 skb->nfcache |= NFC_ALTERED;
1218 #ifdef CONFIG_NETFILTER_DEBUG
1219 /* Packet path as if nothing had happened. */
1220 skb->nf_debug = olddebug;
1225 /* Used by ipt_REJECT. */
1226 static void ip_conntrack_attach(struct sk_buff *nskb, struct nf_ct_info *nfct)
1228 struct ip_conntrack *ct;
1229 enum ip_conntrack_info ctinfo;
1231 ct = __ip_conntrack_get(nfct, &ctinfo);
1233 /* This ICMP is in reverse direction to the packet which
1235 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1236 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1238 ctinfo = IP_CT_RELATED;
1240 /* Attach new skbuff, and increment count */
1241 nskb->nfct = &ct->infos[ctinfo];
1242 atomic_inc(&ct->ct_general.use);
1246 do_kill(const struct ip_conntrack_tuple_hash *i,
1247 int (*kill)(const struct ip_conntrack *i, void *data),
1250 return kill(i->ctrack, data);
1253 /* Bring out ya dead! */
1254 static struct ip_conntrack_tuple_hash *
1255 get_next_corpse(int (*kill)(const struct ip_conntrack *i, void *data),
1256 void *data, unsigned int *bucket)
1258 struct ip_conntrack_tuple_hash *h = NULL;
1260 READ_LOCK(&ip_conntrack_lock);
1261 for (; !h && *bucket < ip_conntrack_htable_size; (*bucket)++) {
1262 h = LIST_FIND(&ip_conntrack_hash[*bucket], do_kill,
1263 struct ip_conntrack_tuple_hash *, kill, data);
1266 atomic_inc(&h->ctrack->ct_general.use);
1267 READ_UNLOCK(&ip_conntrack_lock);
1273 ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data),
1276 struct ip_conntrack_tuple_hash *h;
1277 unsigned int bucket = 0;
1279 while ((h = get_next_corpse(kill, data, &bucket)) != NULL) {
1280 /* Time to push up daises... */
1281 if (del_timer(&h->ctrack->timeout))
1282 death_by_timeout((unsigned long)h->ctrack);
1283 /* ... else the timer will get him soon. */
1285 ip_conntrack_put(h->ctrack);
1289 /* Fast function for those who don't want to parse /proc (and I don't
1291 /* Reversing the socket's dst/src point of view gives us the reply
1294 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
1296 struct inet_opt *inet = inet_sk(sk);
1297 struct ip_conntrack_tuple_hash *h;
1298 struct ip_conntrack_tuple tuple;
1300 IP_CT_TUPLE_U_BLANK(&tuple);
1301 tuple.src.ip = inet->rcv_saddr;
1302 tuple.src.u.tcp.port = inet->sport;
1303 tuple.dst.ip = inet->daddr;
1304 tuple.dst.u.tcp.port = inet->dport;
1305 tuple.dst.protonum = IPPROTO_TCP;
1307 /* We only do TCP at the moment: is there a better way? */
1308 if (strcmp(sk->sk_prot->name, "TCP")) {
1309 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
1310 return -ENOPROTOOPT;
1313 if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
1314 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
1315 *len, sizeof(struct sockaddr_in));
1319 h = ip_conntrack_find_get(&tuple, NULL);
1321 struct sockaddr_in sin;
1323 sin.sin_family = AF_INET;
1324 sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
1325 .tuple.dst.u.tcp.port;
1326 sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
1329 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
1330 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1331 ip_conntrack_put(h->ctrack);
1332 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
1337 DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
1338 NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
1339 NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
1343 static struct nf_sockopt_ops so_getorigdst = {
1345 .get_optmin = SO_ORIGINAL_DST,
1346 .get_optmax = SO_ORIGINAL_DST+1,
1350 static int kill_all(const struct ip_conntrack *i, void *data)
1355 /* Mishearing the voices in his head, our hero wonders how he's
1356 supposed to kill the mall. */
1357 void ip_conntrack_cleanup(void)
1359 ip_ct_attach = NULL;
1360 /* This makes sure all current packets have passed through
1361 netfilter framework. Roll on, two-stage module
1366 ip_ct_selective_cleanup(kill_all, NULL);
1367 if (atomic_read(&ip_conntrack_count) != 0) {
1369 goto i_see_dead_people;
1372 kmem_cache_destroy(ip_conntrack_cachep);
1373 vfree(ip_conntrack_hash);
1374 nf_unregister_sockopt(&so_getorigdst);
1377 static int hashsize;
1378 MODULE_PARM(hashsize, "i");
1380 int __init ip_conntrack_init(void)
1385 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1386 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1388 ip_conntrack_htable_size = hashsize;
1390 ip_conntrack_htable_size
1391 = (((num_physpages << PAGE_SHIFT) / 16384)
1392 / sizeof(struct list_head));
1393 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1394 ip_conntrack_htable_size = 8192;
1395 if (ip_conntrack_htable_size < 16)
1396 ip_conntrack_htable_size = 16;
1398 ip_conntrack_max = 8 * ip_conntrack_htable_size;
1400 printk("ip_conntrack version %s (%u buckets, %d max)"
1401 " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
1402 ip_conntrack_htable_size, ip_conntrack_max,
1403 sizeof(struct ip_conntrack));
1405 ret = nf_register_sockopt(&so_getorigdst);
1407 printk(KERN_ERR "Unable to register netfilter socket option\n");
1411 ip_conntrack_hash = vmalloc(sizeof(struct list_head)
1412 * ip_conntrack_htable_size);
1413 if (!ip_conntrack_hash) {
1414 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1415 goto err_unreg_sockopt;
1418 ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
1419 sizeof(struct ip_conntrack), 0,
1420 SLAB_HWCACHE_ALIGN, NULL, NULL);
1421 if (!ip_conntrack_cachep) {
1422 printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
1425 /* Don't NEED lock here, but good form anyway. */
1426 WRITE_LOCK(&ip_conntrack_lock);
1427 /* Sew in builtin protocols. */
1428 list_append(&protocol_list, &ip_conntrack_protocol_tcp);
1429 list_append(&protocol_list, &ip_conntrack_protocol_udp);
1430 list_append(&protocol_list, &ip_conntrack_protocol_icmp);
1431 WRITE_UNLOCK(&ip_conntrack_lock);
1433 for (i = 0; i < ip_conntrack_htable_size; i++)
1434 INIT_LIST_HEAD(&ip_conntrack_hash[i]);
1436 /* For use by ipt_REJECT */
1437 ip_ct_attach = ip_conntrack_attach;
1439 /* Set up fake conntrack:
1440 - to never be deleted, not in any hashes */
1441 atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1442 /* - and look it like as a confirmed connection */
1443 set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1444 /* - and prepare the ctinfo field for REJECT & NAT. */
1445 ip_conntrack_untracked.infos[IP_CT_NEW].master =
1446 ip_conntrack_untracked.infos[IP_CT_RELATED].master =
1447 ip_conntrack_untracked.infos[IP_CT_RELATED + IP_CT_IS_REPLY].master =
1448 &ip_conntrack_untracked.ct_general;
1453 vfree(ip_conntrack_hash);
1455 nf_unregister_sockopt(&so_getorigdst);