1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
12 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
13 * - new API and handling of conntrack/nat helpers
14 * - now capable of multiple expectations for one master
15 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
16 * - add usage/reference counts to ip_conntrack_expect
17 * - export ip_conntrack[_expect]_{find_get,put} functions
20 #include <linux/config.h>
21 #include <linux/types.h>
22 #include <linux/icmp.h>
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/module.h>
27 #include <linux/skbuff.h>
28 #include <linux/proc_fs.h>
29 #include <linux/vmalloc.h>
30 #include <net/checksum.h>
31 #include <linux/stddef.h>
32 #include <linux/sysctl.h>
33 #include <linux/slab.h>
34 #include <linux/random.h>
35 #include <linux/jhash.h>
36 /* For ERR_PTR(). Yeah, I know... --RR */
39 /* This rwlock protects the main hash table, protocol/helper/expected
40 registrations, conntrack timers*/
41 #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
42 #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
44 #include <linux/netfilter_ipv4/ip_conntrack.h>
45 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
46 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
47 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
48 #include <linux/netfilter_ipv4/listhelp.h>
50 #define IP_CONNTRACK_VERSION "2.1"
55 #define DEBUGP(format, args...)
58 DECLARE_RWLOCK(ip_conntrack_lock);
59 DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock);
61 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
62 LIST_HEAD(ip_conntrack_expect_list);
63 LIST_HEAD(protocol_list);
64 static LIST_HEAD(helpers);
65 unsigned int ip_conntrack_htable_size = 0;
67 static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
68 struct list_head *ip_conntrack_hash;
69 static kmem_cache_t *ip_conntrack_cachep;
70 struct ip_conntrack ip_conntrack_untracked;
72 extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
74 static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr,
77 return protocol == curr->proto;
80 struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol)
82 struct ip_conntrack_protocol *p;
84 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
85 p = LIST_FIND(&protocol_list, proto_cmpfn,
86 struct ip_conntrack_protocol *, protocol);
88 p = &ip_conntrack_generic_protocol;
93 struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol)
95 struct ip_conntrack_protocol *p;
97 READ_LOCK(&ip_conntrack_lock);
98 p = __ip_ct_find_proto(protocol);
99 READ_UNLOCK(&ip_conntrack_lock);
104 ip_conntrack_put(struct ip_conntrack *ct)
107 IP_NF_ASSERT(ct->infos[0].master);
108 /* nf_conntrack_put wants to go via an info struct, so feed it
110 nf_conntrack_put(&ct->infos[0]);
113 static int ip_conntrack_hash_rnd_initted;
114 static unsigned int ip_conntrack_hash_rnd;
117 hash_conntrack(const struct ip_conntrack_tuple *tuple)
122 return (jhash_3words(tuple->src.ip,
123 (tuple->dst.ip ^ tuple->dst.protonum),
124 (tuple->src.u.all | (tuple->dst.u.all << 16)),
125 ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
129 get_tuple(const struct iphdr *iph,
130 const struct sk_buff *skb,
131 unsigned int dataoff,
132 struct ip_conntrack_tuple *tuple,
133 const struct ip_conntrack_protocol *protocol)
136 if (iph->frag_off & htons(IP_OFFSET)) {
137 printk("ip_conntrack_core: Frag of proto %u.\n",
142 tuple->src.ip = iph->saddr;
143 tuple->dst.ip = iph->daddr;
144 tuple->dst.protonum = iph->protocol;
146 return protocol->pkt_to_tuple(skb, dataoff, tuple);
150 invert_tuple(struct ip_conntrack_tuple *inverse,
151 const struct ip_conntrack_tuple *orig,
152 const struct ip_conntrack_protocol *protocol)
154 inverse->src.ip = orig->dst.ip;
155 inverse->dst.ip = orig->src.ip;
156 inverse->dst.protonum = orig->dst.protonum;
158 return protocol->invert_tuple(inverse, orig);
162 /* ip_conntrack_expect helper functions */
164 /* Compare tuple parts depending on mask. */
165 static inline int expect_cmp(const struct ip_conntrack_expect *i,
166 const struct ip_conntrack_tuple *tuple)
168 MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
169 return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask);
173 destroy_expect(struct ip_conntrack_expect *exp)
175 DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(&exp->use));
176 IP_NF_ASSERT(atomic_read(&exp->use));
177 IP_NF_ASSERT(!timer_pending(&exp->timeout));
183 inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
187 if (atomic_dec_and_test(&exp->use)) {
188 /* usage count dropped to zero */
193 static inline struct ip_conntrack_expect *
194 __ip_ct_expect_find(const struct ip_conntrack_tuple *tuple)
196 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
197 MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
198 return LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
199 struct ip_conntrack_expect *, tuple);
202 /* Find a expectation corresponding to a tuple. */
203 struct ip_conntrack_expect *
204 ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
206 struct ip_conntrack_expect *exp;
208 READ_LOCK(&ip_conntrack_lock);
209 READ_LOCK(&ip_conntrack_expect_tuple_lock);
210 exp = __ip_ct_expect_find(tuple);
212 atomic_inc(&exp->use);
213 READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
214 READ_UNLOCK(&ip_conntrack_lock);
219 /* remove one specific expectation from all lists and drop refcount,
220 * does _NOT_ delete the timer. */
221 static void __unexpect_related(struct ip_conntrack_expect *expect)
223 DEBUGP("unexpect_related(%p)\n", expect);
224 MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
226 /* we're not allowed to unexpect a confirmed expectation! */
227 IP_NF_ASSERT(!expect->sibling);
229 /* delete from global and local lists */
230 list_del(&expect->list);
231 list_del(&expect->expected_list);
233 /* decrement expect-count of master conntrack */
234 if (expect->expectant)
235 expect->expectant->expecting--;
237 ip_conntrack_expect_put(expect);
240 /* remove one specific expecatation from all lists, drop refcount
242 * This function can _NOT_ be called for confirmed expects! */
243 static void unexpect_related(struct ip_conntrack_expect *expect)
245 IP_NF_ASSERT(expect->expectant);
246 IP_NF_ASSERT(expect->expectant->helper);
247 /* if we are supposed to have a timer, but we can't delete
248 * it: race condition. __unexpect_related will
249 * be calledd by timeout function */
250 if (expect->expectant->helper->timeout
251 && !del_timer(&expect->timeout))
254 __unexpect_related(expect);
257 /* delete all unconfirmed expectations for this conntrack */
258 static void remove_expectations(struct ip_conntrack *ct, int drop_refcount)
260 struct list_head *exp_entry, *next;
261 struct ip_conntrack_expect *exp;
263 DEBUGP("remove_expectations(%p)\n", ct);
265 list_for_each_safe(exp_entry, next, &ct->sibling_list) {
266 exp = list_entry(exp_entry, struct ip_conntrack_expect,
269 /* we skip established expectations, as we want to delete
270 * the un-established ones only */
272 DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct);
274 /* Indicate that this expectations parent is dead */
275 ip_conntrack_put(exp->expectant);
276 exp->expectant = NULL;
281 IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp));
282 IP_NF_ASSERT(exp->expectant == ct);
284 /* delete expectation from global and private lists */
285 unexpect_related(exp);
290 clean_from_lists(struct ip_conntrack *ct)
294 DEBUGP("clean_from_lists(%p)\n", ct);
295 MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
297 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
298 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
299 LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
300 LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
302 /* Destroy all un-established, pending expectations */
303 remove_expectations(ct, 1);
307 destroy_conntrack(struct nf_conntrack *nfct)
309 struct ip_conntrack *ct = (struct ip_conntrack *)nfct, *master = NULL;
310 struct ip_conntrack_protocol *proto;
312 DEBUGP("destroy_conntrack(%p)\n", ct);
313 IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
314 IP_NF_ASSERT(!timer_pending(&ct->timeout));
316 /* To make sure we don't get any weird locking issues here:
317 * destroy_conntrack() MUST NOT be called with a write lock
318 * to ip_conntrack_lock!!! -HW */
319 proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
320 if (proto && proto->destroy)
323 if (ip_conntrack_destroyed)
324 ip_conntrack_destroyed(ct);
326 WRITE_LOCK(&ip_conntrack_lock);
327 /* Make sure don't leave any orphaned expectations lying around */
329 remove_expectations(ct, 1);
331 /* Delete our master expectation */
333 if (ct->master->expectant) {
334 /* can't call __unexpect_related here,
335 * since it would screw up expect_list */
336 list_del(&ct->master->expected_list);
337 master = ct->master->expectant;
341 WRITE_UNLOCK(&ip_conntrack_lock);
344 ip_conntrack_put(master);
346 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
347 kmem_cache_free(ip_conntrack_cachep, ct);
348 atomic_dec(&ip_conntrack_count);
351 static void death_by_timeout(unsigned long ul_conntrack)
353 struct ip_conntrack *ct = (void *)ul_conntrack;
355 WRITE_LOCK(&ip_conntrack_lock);
356 clean_from_lists(ct);
357 WRITE_UNLOCK(&ip_conntrack_lock);
358 ip_conntrack_put(ct);
362 conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
363 const struct ip_conntrack_tuple *tuple,
364 const struct ip_conntrack *ignored_conntrack)
366 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
367 return i->ctrack != ignored_conntrack
368 && ip_ct_tuple_equal(tuple, &i->tuple);
371 static struct ip_conntrack_tuple_hash *
372 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
373 const struct ip_conntrack *ignored_conntrack)
375 struct ip_conntrack_tuple_hash *h;
376 unsigned int hash = hash_conntrack(tuple);
378 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
379 h = LIST_FIND(&ip_conntrack_hash[hash],
381 struct ip_conntrack_tuple_hash *,
382 tuple, ignored_conntrack);
386 /* Find a connection corresponding to a tuple. */
387 struct ip_conntrack_tuple_hash *
388 ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
389 const struct ip_conntrack *ignored_conntrack)
391 struct ip_conntrack_tuple_hash *h;
393 READ_LOCK(&ip_conntrack_lock);
394 h = __ip_conntrack_find(tuple, ignored_conntrack);
396 atomic_inc(&h->ctrack->ct_general.use);
397 READ_UNLOCK(&ip_conntrack_lock);
402 static inline struct ip_conntrack *
403 __ip_conntrack_get(struct nf_ct_info *nfct, enum ip_conntrack_info *ctinfo)
405 struct ip_conntrack *ct
406 = (struct ip_conntrack *)nfct->master;
408 /* ctinfo is the index of the nfct inside the conntrack */
409 *ctinfo = nfct - ct->infos;
410 IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER);
414 /* Return conntrack and conntrack_info given skb->nfct->master */
415 struct ip_conntrack *
416 ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
419 return __ip_conntrack_get(skb->nfct, ctinfo);
423 /* Confirm a connection given skb->nfct; places it in hash table */
425 __ip_conntrack_confirm(struct nf_ct_info *nfct)
427 unsigned int hash, repl_hash;
428 struct ip_conntrack *ct;
429 enum ip_conntrack_info ctinfo;
431 ct = __ip_conntrack_get(nfct, &ctinfo);
433 /* ipt_REJECT uses ip_conntrack_attach to attach related
434 ICMP/TCP RST packets in other direction. Actual packet
435 which created connection will be IP_CT_NEW or for an
436 expected connection, IP_CT_RELATED. */
437 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
440 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
441 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
443 /* We're not in hash table, and we refuse to set up related
444 connections for unconfirmed conns. But packet copies and
445 REJECT will give spurious warnings here. */
446 /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
448 /* No external references means noone else could have
450 IP_NF_ASSERT(!is_confirmed(ct));
451 DEBUGP("Confirming conntrack %p\n", ct);
453 WRITE_LOCK(&ip_conntrack_lock);
454 /* See if there's one in the list already, including reverse:
455 NAT could have grabbed it without realizing, since we're
456 not in the hash. If there is, we lost race. */
457 if (!LIST_FIND(&ip_conntrack_hash[hash],
459 struct ip_conntrack_tuple_hash *,
460 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
461 && !LIST_FIND(&ip_conntrack_hash[repl_hash],
463 struct ip_conntrack_tuple_hash *,
464 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
465 list_prepend(&ip_conntrack_hash[hash],
466 &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
467 list_prepend(&ip_conntrack_hash[repl_hash],
468 &ct->tuplehash[IP_CT_DIR_REPLY]);
469 /* Timer relative to confirmation time, not original
470 setting time, otherwise we'd get timer wrap in
471 weird delay cases. */
472 ct->timeout.expires += jiffies;
473 add_timer(&ct->timeout);
474 atomic_inc(&ct->ct_general.use);
475 set_bit(IPS_CONFIRMED_BIT, &ct->status);
476 WRITE_UNLOCK(&ip_conntrack_lock);
480 WRITE_UNLOCK(&ip_conntrack_lock);
484 /* Returns true if a connection correspondings to the tuple (required
487 ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
488 const struct ip_conntrack *ignored_conntrack)
490 struct ip_conntrack_tuple_hash *h;
492 READ_LOCK(&ip_conntrack_lock);
493 h = __ip_conntrack_find(tuple, ignored_conntrack);
494 READ_UNLOCK(&ip_conntrack_lock);
499 /* Returns conntrack if it dealt with ICMP, and filled in skb fields */
500 struct ip_conntrack *
501 icmp_error_track(struct sk_buff *skb,
502 enum ip_conntrack_info *ctinfo,
503 unsigned int hooknum)
505 struct ip_conntrack_tuple innertuple, origtuple;
510 struct ip_conntrack_protocol *innerproto;
511 struct ip_conntrack_tuple_hash *h;
514 IP_NF_ASSERT(skb->nfct == NULL);
516 /* Not enough header? */
517 if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &inside, sizeof(inside))!=0)
520 if (inside.icmp.type != ICMP_DEST_UNREACH
521 && inside.icmp.type != ICMP_SOURCE_QUENCH
522 && inside.icmp.type != ICMP_TIME_EXCEEDED
523 && inside.icmp.type != ICMP_PARAMETERPROB
524 && inside.icmp.type != ICMP_REDIRECT)
527 /* Ignore ICMP's containing fragments (shouldn't happen) */
528 if (inside.ip.frag_off & htons(IP_OFFSET)) {
529 DEBUGP("icmp_error_track: fragment of proto %u\n",
534 innerproto = ip_ct_find_proto(inside.ip.protocol);
535 dataoff = skb->nh.iph->ihl*4 + sizeof(inside.icmp) + inside.ip.ihl*4;
536 /* Are they talking about one of our connections? */
537 if (!get_tuple(&inside.ip, skb, dataoff, &origtuple, innerproto)) {
538 DEBUGP("icmp_error: ! get_tuple p=%u", inside.ip.protocol);
542 /* Ordinarily, we'd expect the inverted tupleproto, but it's
543 been preserved inside the ICMP. */
544 if (!invert_tuple(&innertuple, &origtuple, innerproto)) {
545 DEBUGP("icmp_error_track: Can't invert tuple\n");
549 *ctinfo = IP_CT_RELATED;
551 h = ip_conntrack_find_get(&innertuple, NULL);
553 /* Locally generated ICMPs will match inverted if they
554 haven't been SNAT'ed yet */
555 /* FIXME: NAT code has to handle half-done double NAT --RR */
556 if (hooknum == NF_IP_LOCAL_OUT)
557 h = ip_conntrack_find_get(&origtuple, NULL);
560 DEBUGP("icmp_error_track: no match\n");
563 /* Reverse direction from that found */
564 if (DIRECTION(h) != IP_CT_DIR_REPLY)
565 *ctinfo += IP_CT_IS_REPLY;
567 if (DIRECTION(h) == IP_CT_DIR_REPLY)
568 *ctinfo += IP_CT_IS_REPLY;
571 /* Update skb to refer to this connection */
572 skb->nfct = &h->ctrack->infos[*ctinfo];
576 /* There's a small race here where we may free a just-assured
577 connection. Too bad: we're in trouble anyway. */
578 static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
580 return !(test_bit(IPS_ASSURED_BIT, &i->ctrack->status));
583 static int early_drop(struct list_head *chain)
585 /* Traverse backwards: gives us oldest, which is roughly LRU */
586 struct ip_conntrack_tuple_hash *h;
589 READ_LOCK(&ip_conntrack_lock);
590 h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
592 atomic_inc(&h->ctrack->ct_general.use);
593 READ_UNLOCK(&ip_conntrack_lock);
598 if (del_timer(&h->ctrack->timeout)) {
599 death_by_timeout((unsigned long)h->ctrack);
602 ip_conntrack_put(h->ctrack);
606 static inline int helper_cmp(const struct ip_conntrack_helper *i,
607 const struct ip_conntrack_tuple *rtuple)
609 return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
612 struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
614 return LIST_FIND(&helpers, helper_cmp,
615 struct ip_conntrack_helper *,
619 /* Allocate a new conntrack: we return -ENOMEM if classification
620 failed due to stress. Otherwise it really is unclassifiable. */
621 static struct ip_conntrack_tuple_hash *
622 init_conntrack(const struct ip_conntrack_tuple *tuple,
623 struct ip_conntrack_protocol *protocol,
626 struct ip_conntrack *conntrack;
627 struct ip_conntrack_tuple repl_tuple;
629 struct ip_conntrack_expect *expected;
631 static unsigned int drop_next;
633 if (!ip_conntrack_hash_rnd_initted) {
634 get_random_bytes(&ip_conntrack_hash_rnd, 4);
635 ip_conntrack_hash_rnd_initted = 1;
638 hash = hash_conntrack(tuple);
640 if (ip_conntrack_max &&
641 atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
642 /* Try dropping from random chain, or else from the
643 chain about to put into (in case they're trying to
644 bomb one hash chain). */
645 unsigned int next = (drop_next++)%ip_conntrack_htable_size;
647 if (!early_drop(&ip_conntrack_hash[next])
648 && !early_drop(&ip_conntrack_hash[hash])) {
651 "ip_conntrack: table full, dropping"
653 return ERR_PTR(-ENOMEM);
657 if (!invert_tuple(&repl_tuple, tuple, protocol)) {
658 DEBUGP("Can't invert tuple.\n");
662 conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
664 DEBUGP("Can't allocate conntrack.\n");
665 return ERR_PTR(-ENOMEM);
668 memset(conntrack, 0, sizeof(*conntrack));
669 atomic_set(&conntrack->ct_general.use, 1);
670 conntrack->ct_general.destroy = destroy_conntrack;
671 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
672 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack;
673 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
674 conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack;
675 for (i=0; i < IP_CT_NUMBER; i++)
676 conntrack->infos[i].master = &conntrack->ct_general;
678 if (!protocol->new(conntrack, skb)) {
679 kmem_cache_free(ip_conntrack_cachep, conntrack);
682 /* Don't set timer yet: wait for confirmation */
683 init_timer(&conntrack->timeout);
684 conntrack->timeout.data = (unsigned long)conntrack;
685 conntrack->timeout.function = death_by_timeout;
687 INIT_LIST_HEAD(&conntrack->sibling_list);
689 WRITE_LOCK(&ip_conntrack_lock);
690 /* Need finding and deleting of expected ONLY if we win race */
691 READ_LOCK(&ip_conntrack_expect_tuple_lock);
692 expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
693 struct ip_conntrack_expect *, tuple);
694 READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
696 /* If master is not in hash table yet (ie. packet hasn't left
697 this machine yet), how can other end know about expected?
698 Hence these are not the droids you are looking for (if
699 master ct never got confirmed, we'd hold a reference to it
700 and weird things would happen to future packets). */
701 if (expected && !is_confirmed(expected->expectant))
704 /* Look up the conntrack helper for master connections only */
706 conntrack->helper = ip_ct_find_helper(&repl_tuple);
708 /* If the expectation is dying, then this is a loser. */
710 && expected->expectant->helper->timeout
711 && ! del_timer(&expected->timeout))
715 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
716 conntrack, expected);
717 /* Welcome, Mr. Bond. We've been expecting you... */
718 IP_NF_ASSERT(master_ct(conntrack));
719 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
720 conntrack->master = expected;
721 expected->sibling = conntrack;
722 LIST_DELETE(&ip_conntrack_expect_list, expected);
723 expected->expectant->expecting--;
724 nf_conntrack_get(&master_ct(conntrack)->infos[0]);
726 atomic_inc(&ip_conntrack_count);
727 WRITE_UNLOCK(&ip_conntrack_lock);
729 if (expected && expected->expectfn)
730 expected->expectfn(conntrack);
731 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
734 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
735 static inline struct ip_conntrack *
736 resolve_normal_ct(struct sk_buff *skb,
737 struct ip_conntrack_protocol *proto,
739 unsigned int hooknum,
740 enum ip_conntrack_info *ctinfo)
742 struct ip_conntrack_tuple tuple;
743 struct ip_conntrack_tuple_hash *h;
745 IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
747 if (!get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, &tuple, proto))
750 /* look for tuple match */
751 h = ip_conntrack_find_get(&tuple, NULL);
753 h = init_conntrack(&tuple, proto, skb);
760 /* It exists; we have (non-exclusive) reference. */
761 if (DIRECTION(h) == IP_CT_DIR_REPLY) {
762 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
763 /* Please set reply bit if this packet OK */
766 /* Once we've had two way comms, always ESTABLISHED. */
767 if (test_bit(IPS_SEEN_REPLY_BIT, &h->ctrack->status)) {
768 DEBUGP("ip_conntrack_in: normal packet for %p\n",
770 *ctinfo = IP_CT_ESTABLISHED;
771 } else if (test_bit(IPS_EXPECTED_BIT, &h->ctrack->status)) {
772 DEBUGP("ip_conntrack_in: related packet for %p\n",
774 *ctinfo = IP_CT_RELATED;
776 DEBUGP("ip_conntrack_in: new packet for %p\n",
782 skb->nfct = &h->ctrack->infos[*ctinfo];
786 /* Netfilter hook itself. */
787 unsigned int ip_conntrack_in(unsigned int hooknum,
788 struct sk_buff **pskb,
789 const struct net_device *in,
790 const struct net_device *out,
791 int (*okfn)(struct sk_buff *))
793 struct ip_conntrack *ct;
794 enum ip_conntrack_info ctinfo;
795 struct ip_conntrack_protocol *proto;
800 if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
801 if (net_ratelimit()) {
802 printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
803 (*pskb)->nh.iph->protocol, hooknum);
808 /* FIXME: Do this right please. --RR */
809 (*pskb)->nfcache |= NFC_UNKNOWN;
811 /* Doesn't cover locally-generated broadcast, so not worth it. */
813 /* Ignore broadcast: no `connection'. */
814 if ((*pskb)->pkt_type == PACKET_BROADCAST) {
815 printk("Broadcast packet!\n");
817 } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
818 == htonl(0x000000FF)) {
819 printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
820 NIPQUAD((*pskb)->nh.iph->saddr),
821 NIPQUAD((*pskb)->nh.iph->daddr),
822 (*pskb)->sk, (*pskb)->pkt_type);
826 /* Previously seen (loopback or untracked)? Ignore. */
830 proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
832 /* It may be an icmp error... */
833 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
834 && icmp_error_track(*pskb, &ctinfo, hooknum))
837 if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo)))
838 /* Not valid part of a connection */
842 /* Too stressed to deal. */
845 IP_NF_ASSERT((*pskb)->nfct);
847 ret = proto->packet(ct, *pskb, ctinfo);
850 nf_conntrack_put((*pskb)->nfct);
851 (*pskb)->nfct = NULL;
855 if (ret != NF_DROP && ct->helper) {
856 ret = ct->helper->help(*pskb, ct, ctinfo);
859 nf_conntrack_put((*pskb)->nfct);
860 (*pskb)->nfct = NULL;
865 set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
870 int invert_tuplepr(struct ip_conntrack_tuple *inverse,
871 const struct ip_conntrack_tuple *orig)
873 return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum));
876 static inline int resent_expect(const struct ip_conntrack_expect *i,
877 const struct ip_conntrack_tuple *tuple,
878 const struct ip_conntrack_tuple *mask)
880 DEBUGP("resent_expect\n");
881 DEBUGP(" tuple: "); DUMP_TUPLE(&i->tuple);
882 DEBUGP("ct_tuple: "); DUMP_TUPLE(&i->ct_tuple);
883 DEBUGP("test tuple: "); DUMP_TUPLE(tuple);
884 return (((i->ct_tuple.dst.protonum == 0 && ip_ct_tuple_equal(&i->tuple, tuple))
885 || (i->ct_tuple.dst.protonum && ip_ct_tuple_equal(&i->ct_tuple, tuple)))
886 && ip_ct_tuple_equal(&i->mask, mask));
889 /* Would two expected things clash? */
890 static inline int expect_clash(const struct ip_conntrack_expect *i,
891 const struct ip_conntrack_tuple *tuple,
892 const struct ip_conntrack_tuple *mask)
894 /* Part covered by intersection of masks must be unequal,
895 otherwise they clash */
896 struct ip_conntrack_tuple intersect_mask
897 = { { i->mask.src.ip & mask->src.ip,
898 { i->mask.src.u.all & mask->src.u.all } },
899 { i->mask.dst.ip & mask->dst.ip,
900 { i->mask.dst.u.all & mask->dst.u.all },
901 i->mask.dst.protonum & mask->dst.protonum } };
903 return ip_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask);
906 inline void ip_conntrack_unexpect_related(struct ip_conntrack_expect *expect)
908 WRITE_LOCK(&ip_conntrack_lock);
909 unexpect_related(expect);
910 WRITE_UNLOCK(&ip_conntrack_lock);
913 static void expectation_timed_out(unsigned long ul_expect)
915 struct ip_conntrack_expect *expect = (void *) ul_expect;
917 DEBUGP("expectation %p timed out\n", expect);
918 WRITE_LOCK(&ip_conntrack_lock);
919 __unexpect_related(expect);
920 WRITE_UNLOCK(&ip_conntrack_lock);
923 struct ip_conntrack_expect *
924 ip_conntrack_expect_alloc(void)
926 struct ip_conntrack_expect *new;
928 new = (struct ip_conntrack_expect *)
929 kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC);
931 DEBUGP("expect_related: OOM allocating expect\n");
935 /* tuple_cmp compares whole union, we have to initialized cleanly */
936 memset(new, 0, sizeof(struct ip_conntrack_expect));
942 ip_conntrack_expect_insert(struct ip_conntrack_expect *new,
943 struct ip_conntrack *related_to)
945 DEBUGP("new expectation %p of conntrack %p\n", new, related_to);
946 new->expectant = related_to;
948 atomic_set(&new->use, 1);
950 /* add to expected list for this connection */
951 list_add(&new->expected_list, &related_to->sibling_list);
952 /* add to global list of expectations */
954 list_prepend(&ip_conntrack_expect_list, &new->list);
955 /* add and start timer if required */
956 if (related_to->helper->timeout) {
957 init_timer(&new->timeout);
958 new->timeout.data = (unsigned long)new;
959 new->timeout.function = expectation_timed_out;
960 new->timeout.expires = jiffies +
961 related_to->helper->timeout * HZ;
962 add_timer(&new->timeout);
964 related_to->expecting++;
967 /* Add a related connection. */
968 int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
969 struct ip_conntrack *related_to)
971 struct ip_conntrack_expect *old;
974 WRITE_LOCK(&ip_conntrack_lock);
975 /* Because of the write lock, no reader can walk the lists,
976 * so there is no need to use the tuple lock too */
978 DEBUGP("ip_conntrack_expect_related %p\n", related_to);
979 DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
980 DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
982 old = LIST_FIND(&ip_conntrack_expect_list, resent_expect,
983 struct ip_conntrack_expect *, &expect->tuple,
986 /* Helper private data may contain offsets but no pointers
987 pointing into the payload - otherwise we should have to copy
988 the data filled out by the helper over the old one */
989 DEBUGP("expect_related: resent packet\n");
990 if (related_to->helper->timeout) {
991 if (!del_timer(&old->timeout)) {
992 /* expectation is dying. Fall through */
995 old->timeout.expires = jiffies +
996 related_to->helper->timeout * HZ;
997 add_timer(&old->timeout);
1001 WRITE_UNLOCK(&ip_conntrack_lock);
1005 } else if (related_to->helper->max_expected &&
1006 related_to->expecting >= related_to->helper->max_expected) {
1007 struct list_head *cur_item;
1009 if (!(related_to->helper->flags &
1010 IP_CT_HELPER_F_REUSE_EXPECT)) {
1011 WRITE_UNLOCK(&ip_conntrack_lock);
1012 if (net_ratelimit())
1014 "ip_conntrack: max number of expected "
1015 "connections %i of %s reached for "
1016 "%u.%u.%u.%u->%u.%u.%u.%u\n",
1017 related_to->helper->max_expected,
1018 related_to->helper->name,
1019 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
1020 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
1024 DEBUGP("ip_conntrack: max number of expected "
1025 "connections %i of %s reached for "
1026 "%u.%u.%u.%u->%u.%u.%u.%u, reusing\n",
1027 related_to->helper->max_expected,
1028 related_to->helper->name,
1029 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
1030 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
1032 /* choose the the oldest expectation to evict */
1033 list_for_each(cur_item, &related_to->sibling_list) {
1034 struct ip_conntrack_expect *cur;
1036 cur = list_entry(cur_item,
1037 struct ip_conntrack_expect,
1039 if (cur->sibling == NULL) {
1045 /* (!old) cannot happen, since related_to->expecting is the
1046 * number of unconfirmed expects */
1049 /* newnat14 does not reuse the real allocated memory
1050 * structures but rather unexpects the old and
1051 * allocates a new. unexpect_related will decrement
1052 * related_to->expecting.
1054 unexpect_related(old);
1056 } else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash,
1057 struct ip_conntrack_expect *, &expect->tuple,
1059 WRITE_UNLOCK(&ip_conntrack_lock);
1060 DEBUGP("expect_related: busy!\n");
1066 out: ip_conntrack_expect_insert(expect, related_to);
1068 WRITE_UNLOCK(&ip_conntrack_lock);
1073 /* Change tuple in an existing expectation */
1074 int ip_conntrack_change_expect(struct ip_conntrack_expect *expect,
1075 struct ip_conntrack_tuple *newtuple)
1079 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
1080 WRITE_LOCK(&ip_conntrack_expect_tuple_lock);
1082 DEBUGP("change_expect:\n");
1083 DEBUGP("exp tuple: "); DUMP_TUPLE(&expect->tuple);
1084 DEBUGP("exp mask: "); DUMP_TUPLE(&expect->mask);
1085 DEBUGP("newtuple: "); DUMP_TUPLE(newtuple);
1086 if (expect->ct_tuple.dst.protonum == 0) {
1087 /* Never seen before */
1088 DEBUGP("change expect: never seen before\n");
1089 if (!ip_ct_tuple_equal(&expect->tuple, newtuple)
1090 && LIST_FIND(&ip_conntrack_expect_list, expect_clash,
1091 struct ip_conntrack_expect *, newtuple, &expect->mask)) {
1092 /* Force NAT to find an unused tuple */
1095 memcpy(&expect->ct_tuple, &expect->tuple, sizeof(expect->tuple));
1096 memcpy(&expect->tuple, newtuple, sizeof(expect->tuple));
1101 DEBUGP("change expect: resent packet\n");
1102 if (ip_ct_tuple_equal(&expect->tuple, newtuple)) {
1105 /* Force NAT to choose again the same port */
1109 WRITE_UNLOCK(&ip_conntrack_expect_tuple_lock);
1114 /* Alter reply tuple (maybe alter helper). If it's already taken,
1115 return 0 and don't do alteration. */
1116 int ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
1117 const struct ip_conntrack_tuple *newreply)
1119 WRITE_LOCK(&ip_conntrack_lock);
1120 if (__ip_conntrack_find(newreply, conntrack)) {
1121 WRITE_UNLOCK(&ip_conntrack_lock);
1124 /* Should be unconfirmed, so not in hash table yet */
1125 IP_NF_ASSERT(!is_confirmed(conntrack));
1127 DEBUGP("Altering reply tuple of %p to ", conntrack);
1128 DUMP_TUPLE(newreply);
1130 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1131 if (!conntrack->master && list_empty(&conntrack->sibling_list))
1132 conntrack->helper = ip_ct_find_helper(newreply);
1133 WRITE_UNLOCK(&ip_conntrack_lock);
1138 int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
1140 WRITE_LOCK(&ip_conntrack_lock);
1141 list_prepend(&helpers, me);
1142 WRITE_UNLOCK(&ip_conntrack_lock);
1147 static inline int unhelp(struct ip_conntrack_tuple_hash *i,
1148 const struct ip_conntrack_helper *me)
1150 if (i->ctrack->helper == me) {
1151 /* Get rid of any expected. */
1152 remove_expectations(i->ctrack, 0);
1153 /* And *then* set helper to NULL */
1154 i->ctrack->helper = NULL;
1159 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1163 /* Need write lock here, to delete helper. */
1164 WRITE_LOCK(&ip_conntrack_lock);
1165 LIST_DELETE(&helpers, me);
1167 /* Get rid of expecteds, set helpers to NULL. */
1168 for (i = 0; i < ip_conntrack_htable_size; i++)
1169 LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
1170 struct ip_conntrack_tuple_hash *, me);
1171 WRITE_UNLOCK(&ip_conntrack_lock);
1173 /* Someone could be still looking at the helper in a bh. */
1177 /* Refresh conntrack for this many jiffies. */
1178 void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies)
1180 IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
1182 /* If not in hash table, timer will not be active yet */
1183 if (!is_confirmed(ct))
1184 ct->timeout.expires = extra_jiffies;
1186 WRITE_LOCK(&ip_conntrack_lock);
1187 /* Need del_timer for race avoidance (may already be dying). */
1188 if (del_timer(&ct->timeout)) {
1189 ct->timeout.expires = jiffies + extra_jiffies;
1190 add_timer(&ct->timeout);
1192 WRITE_UNLOCK(&ip_conntrack_lock);
1196 /* Returns new sk_buff, or NULL */
1198 ip_ct_gather_frags(struct sk_buff *skb)
1200 struct sock *sk = skb->sk;
1201 #ifdef CONFIG_NETFILTER_DEBUG
1202 unsigned int olddebug = skb->nf_debug;
1210 skb = ip_defrag(skb);
1220 skb_set_owner_w(skb, sk);
1224 ip_send_check(skb->nh.iph);
1225 skb->nfcache |= NFC_ALTERED;
1226 #ifdef CONFIG_NETFILTER_DEBUG
1227 /* Packet path as if nothing had happened. */
1228 skb->nf_debug = olddebug;
1233 /* Used by ipt_REJECT. */
1234 static void ip_conntrack_attach(struct sk_buff *nskb, struct nf_ct_info *nfct)
1236 struct ip_conntrack *ct;
1237 enum ip_conntrack_info ctinfo;
1239 ct = __ip_conntrack_get(nfct, &ctinfo);
1241 /* This ICMP is in reverse direction to the packet which
1243 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1244 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1246 ctinfo = IP_CT_RELATED;
1248 /* Attach new skbuff, and increment count */
1249 nskb->nfct = &ct->infos[ctinfo];
1250 atomic_inc(&ct->ct_general.use);
1254 do_kill(const struct ip_conntrack_tuple_hash *i,
1255 int (*kill)(const struct ip_conntrack *i, void *data),
1258 return kill(i->ctrack, data);
1261 /* Bring out ya dead! */
1262 static struct ip_conntrack_tuple_hash *
1263 get_next_corpse(int (*kill)(const struct ip_conntrack *i, void *data),
1264 void *data, unsigned int *bucket)
1266 struct ip_conntrack_tuple_hash *h = NULL;
1268 READ_LOCK(&ip_conntrack_lock);
1269 for (; !h && *bucket < ip_conntrack_htable_size; (*bucket)++) {
1270 h = LIST_FIND(&ip_conntrack_hash[*bucket], do_kill,
1271 struct ip_conntrack_tuple_hash *, kill, data);
1274 atomic_inc(&h->ctrack->ct_general.use);
1275 READ_UNLOCK(&ip_conntrack_lock);
1281 ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data),
1284 struct ip_conntrack_tuple_hash *h;
1285 unsigned int bucket = 0;
1287 while ((h = get_next_corpse(kill, data, &bucket)) != NULL) {
1288 /* Time to push up daises... */
1289 if (del_timer(&h->ctrack->timeout))
1290 death_by_timeout((unsigned long)h->ctrack);
1291 /* ... else the timer will get him soon. */
1293 ip_conntrack_put(h->ctrack);
1297 /* Fast function for those who don't want to parse /proc (and I don't
1299 /* Reversing the socket's dst/src point of view gives us the reply
1302 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
1304 struct inet_opt *inet = inet_sk(sk);
1305 struct ip_conntrack_tuple_hash *h;
1306 struct ip_conntrack_tuple tuple;
1308 IP_CT_TUPLE_U_BLANK(&tuple);
1309 tuple.src.ip = inet->rcv_saddr;
1310 tuple.src.u.tcp.port = inet->sport;
1311 tuple.dst.ip = inet->daddr;
1312 tuple.dst.u.tcp.port = inet->dport;
1313 tuple.dst.protonum = IPPROTO_TCP;
1315 /* We only do TCP at the moment: is there a better way? */
1316 if (strcmp(sk->sk_prot->name, "TCP")) {
1317 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
1318 return -ENOPROTOOPT;
1321 if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
1322 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
1323 *len, sizeof(struct sockaddr_in));
1327 h = ip_conntrack_find_get(&tuple, NULL);
1329 struct sockaddr_in sin;
1331 sin.sin_family = AF_INET;
1332 sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
1333 .tuple.dst.u.tcp.port;
1334 sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
1337 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
1338 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1339 ip_conntrack_put(h->ctrack);
1340 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
1345 DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
1346 NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
1347 NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
1351 static struct nf_sockopt_ops so_getorigdst = {
1353 .get_optmin = SO_ORIGINAL_DST,
1354 .get_optmax = SO_ORIGINAL_DST+1,
1358 static int kill_all(const struct ip_conntrack *i, void *data)
1363 /* Mishearing the voices in his head, our hero wonders how he's
1364 supposed to kill the mall. */
1365 void ip_conntrack_cleanup(void)
1367 ip_ct_attach = NULL;
1368 /* This makes sure all current packets have passed through
1369 netfilter framework. Roll on, two-stage module
1374 ip_ct_selective_cleanup(kill_all, NULL);
1375 if (atomic_read(&ip_conntrack_count) != 0) {
1377 goto i_see_dead_people;
1380 kmem_cache_destroy(ip_conntrack_cachep);
1381 vfree(ip_conntrack_hash);
1382 nf_unregister_sockopt(&so_getorigdst);
1385 static int hashsize;
1386 MODULE_PARM(hashsize, "i");
1388 int __init ip_conntrack_init(void)
1393 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1394 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1396 ip_conntrack_htable_size = hashsize;
1398 ip_conntrack_htable_size
1399 = (((num_physpages << PAGE_SHIFT) / 16384)
1400 / sizeof(struct list_head));
1401 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1402 ip_conntrack_htable_size = 8192;
1403 if (ip_conntrack_htable_size < 16)
1404 ip_conntrack_htable_size = 16;
1406 ip_conntrack_max = 8 * ip_conntrack_htable_size;
1408 printk("ip_conntrack version %s (%u buckets, %d max)"
1409 " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
1410 ip_conntrack_htable_size, ip_conntrack_max,
1411 sizeof(struct ip_conntrack));
1413 ret = nf_register_sockopt(&so_getorigdst);
1415 printk(KERN_ERR "Unable to register netfilter socket option\n");
1419 ip_conntrack_hash = vmalloc(sizeof(struct list_head)
1420 * ip_conntrack_htable_size);
1421 if (!ip_conntrack_hash) {
1422 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1423 goto err_unreg_sockopt;
1426 ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
1427 sizeof(struct ip_conntrack), 0,
1428 SLAB_HWCACHE_ALIGN, NULL, NULL);
1429 if (!ip_conntrack_cachep) {
1430 printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
1433 /* Don't NEED lock here, but good form anyway. */
1434 WRITE_LOCK(&ip_conntrack_lock);
1435 /* Sew in builtin protocols. */
1436 list_append(&protocol_list, &ip_conntrack_protocol_tcp);
1437 list_append(&protocol_list, &ip_conntrack_protocol_udp);
1438 list_append(&protocol_list, &ip_conntrack_protocol_icmp);
1439 WRITE_UNLOCK(&ip_conntrack_lock);
1441 for (i = 0; i < ip_conntrack_htable_size; i++)
1442 INIT_LIST_HEAD(&ip_conntrack_hash[i]);
1444 /* For use by ipt_REJECT */
1445 ip_ct_attach = ip_conntrack_attach;
1447 /* Set up fake conntrack:
1448 - to never be deleted, not in any hashes */
1449 atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1450 /* - and look it like as a confirmed connection */
1451 set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1452 /* - and prepare the ctinfo field for REJECT & NAT. */
1453 ip_conntrack_untracked.infos[IP_CT_NEW].master =
1454 ip_conntrack_untracked.infos[IP_CT_RELATED].master =
1455 ip_conntrack_untracked.infos[IP_CT_RELATED + IP_CT_IS_REPLY].master =
1456 &ip_conntrack_untracked.ct_general;
1461 vfree(ip_conntrack_hash);
1463 nf_unregister_sockopt(&so_getorigdst);