1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
12 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
13 * - new API and handling of conntrack/nat helpers
14 * - now capable of multiple expectations for one master
15 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
16 * - add usage/reference counts to ip_conntrack_expect
17 * - export ip_conntrack[_expect]_{find_get,put} functions
20 #include <linux/config.h>
21 #include <linux/types.h>
22 #include <linux/icmp.h>
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/module.h>
27 #include <linux/skbuff.h>
28 #include <linux/proc_fs.h>
29 #include <linux/vmalloc.h>
30 #include <net/checksum.h>
31 #include <linux/stddef.h>
32 #include <linux/sysctl.h>
33 #include <linux/slab.h>
34 #include <linux/random.h>
35 #include <linux/jhash.h>
36 /* For ERR_PTR(). Yeah, I know... --RR */
39 /* This rwlock protects the main hash table, protocol/helper/expected
40 registrations, conntrack timers*/
41 #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
42 #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
44 #include <linux/netfilter_ipv4/ip_conntrack.h>
45 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
46 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
47 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
48 #include <linux/netfilter_ipv4/listhelp.h>
50 #define IP_CONNTRACK_VERSION "2.1"
55 #define DEBUGP(format, args...)
58 DECLARE_RWLOCK(ip_conntrack_lock);
59 DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock);
61 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
62 LIST_HEAD(ip_conntrack_expect_list);
63 LIST_HEAD(protocol_list);
64 static LIST_HEAD(helpers);
65 unsigned int ip_conntrack_htable_size = 0;
67 static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
68 struct list_head *ip_conntrack_hash;
69 static kmem_cache_t *ip_conntrack_cachep;
70 struct ip_conntrack ip_conntrack_untracked;
72 extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
74 static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr,
77 return protocol == curr->proto;
80 struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol)
82 struct ip_conntrack_protocol *p;
84 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
85 p = LIST_FIND(&protocol_list, proto_cmpfn,
86 struct ip_conntrack_protocol *, protocol);
88 p = &ip_conntrack_generic_protocol;
93 struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol)
95 struct ip_conntrack_protocol *p;
97 READ_LOCK(&ip_conntrack_lock);
98 p = __ip_ct_find_proto(protocol);
99 READ_UNLOCK(&ip_conntrack_lock);
104 ip_conntrack_put(struct ip_conntrack *ct)
107 IP_NF_ASSERT(ct->infos[0].master);
108 /* nf_conntrack_put wants to go via an info struct, so feed it
110 nf_conntrack_put(&ct->infos[0]);
113 static int ip_conntrack_hash_rnd_initted;
114 static unsigned int ip_conntrack_hash_rnd;
117 hash_conntrack(const struct ip_conntrack_tuple *tuple)
122 return (jhash_3words(tuple->src.ip,
123 (tuple->dst.ip ^ tuple->dst.protonum),
124 (tuple->src.u.all | (tuple->dst.u.all << 16)),
125 ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
129 get_tuple(const struct iphdr *iph,
130 const struct sk_buff *skb,
131 unsigned int dataoff,
132 struct ip_conntrack_tuple *tuple,
133 const struct ip_conntrack_protocol *protocol)
136 if (iph->frag_off & htons(IP_OFFSET)) {
137 printk("ip_conntrack_core: Frag of proto %u.\n",
142 tuple->src.ip = iph->saddr;
143 tuple->dst.ip = iph->daddr;
144 tuple->dst.protonum = iph->protocol;
146 return protocol->pkt_to_tuple(skb, dataoff, tuple);
150 invert_tuple(struct ip_conntrack_tuple *inverse,
151 const struct ip_conntrack_tuple *orig,
152 const struct ip_conntrack_protocol *protocol)
154 inverse->src.ip = orig->dst.ip;
155 inverse->dst.ip = orig->src.ip;
156 inverse->dst.protonum = orig->dst.protonum;
158 return protocol->invert_tuple(inverse, orig);
162 /* ip_conntrack_expect helper functions */
164 /* Compare tuple parts depending on mask. */
165 static inline int expect_cmp(const struct ip_conntrack_expect *i,
166 const struct ip_conntrack_tuple *tuple)
168 MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
169 return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask);
173 destroy_expect(struct ip_conntrack_expect *exp)
175 DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(&exp->use));
176 IP_NF_ASSERT(atomic_read(&exp->use));
177 IP_NF_ASSERT(!timer_pending(&exp->timeout));
183 inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
187 if (atomic_dec_and_test(&exp->use)) {
188 /* usage count dropped to zero */
193 static inline struct ip_conntrack_expect *
194 __ip_ct_expect_find(const struct ip_conntrack_tuple *tuple)
196 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
197 MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
198 return LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
199 struct ip_conntrack_expect *, tuple);
202 /* Find a expectation corresponding to a tuple. */
203 struct ip_conntrack_expect *
204 ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
206 struct ip_conntrack_expect *exp;
208 READ_LOCK(&ip_conntrack_lock);
209 READ_LOCK(&ip_conntrack_expect_tuple_lock);
210 exp = __ip_ct_expect_find(tuple);
212 atomic_inc(&exp->use);
213 READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
214 READ_UNLOCK(&ip_conntrack_lock);
219 /* remove one specific expectation from all lists and drop refcount,
220 * does _NOT_ delete the timer. */
221 static void __unexpect_related(struct ip_conntrack_expect *expect)
223 DEBUGP("unexpect_related(%p)\n", expect);
224 MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
226 /* we're not allowed to unexpect a confirmed expectation! */
227 IP_NF_ASSERT(!expect->sibling);
229 /* delete from global and local lists */
230 list_del(&expect->list);
231 list_del(&expect->expected_list);
233 /* decrement expect-count of master conntrack */
234 if (expect->expectant)
235 expect->expectant->expecting--;
237 ip_conntrack_expect_put(expect);
240 /* remove one specific expecatation from all lists, drop refcount
242 * This function can _NOT_ be called for confirmed expects! */
243 static void unexpect_related(struct ip_conntrack_expect *expect)
245 IP_NF_ASSERT(expect->expectant);
246 IP_NF_ASSERT(expect->expectant->helper);
247 /* if we are supposed to have a timer, but we can't delete
248 * it: race condition. __unexpect_related will
249 * be calledd by timeout function */
250 if (expect->expectant->helper->timeout
251 && !del_timer(&expect->timeout))
254 __unexpect_related(expect);
257 /* delete all unconfirmed expectations for this conntrack */
258 static void remove_expectations(struct ip_conntrack *ct, int drop_refcount)
260 struct list_head *exp_entry, *next;
261 struct ip_conntrack_expect *exp;
263 DEBUGP("remove_expectations(%p)\n", ct);
265 list_for_each_safe(exp_entry, next, &ct->sibling_list) {
266 exp = list_entry(exp_entry, struct ip_conntrack_expect,
269 /* we skip established expectations, as we want to delete
270 * the un-established ones only */
272 DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct);
274 /* Indicate that this expectations parent is dead */
275 ip_conntrack_put(exp->expectant);
276 exp->expectant = NULL;
281 IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp));
282 IP_NF_ASSERT(exp->expectant == ct);
284 /* delete expectation from global and private lists */
285 unexpect_related(exp);
290 clean_from_lists(struct ip_conntrack *ct)
294 DEBUGP("clean_from_lists(%p)\n", ct);
295 MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
297 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
298 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
299 LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
300 LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
302 /* Destroy all un-established, pending expectations */
303 remove_expectations(ct, 1);
307 destroy_conntrack(struct nf_conntrack *nfct)
309 struct ip_conntrack *ct = (struct ip_conntrack *)nfct, *master = NULL;
310 struct ip_conntrack_protocol *proto;
312 DEBUGP("destroy_conntrack(%p)\n", ct);
313 IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
314 IP_NF_ASSERT(!timer_pending(&ct->timeout));
316 /* To make sure we don't get any weird locking issues here:
317 * destroy_conntrack() MUST NOT be called with a write lock
318 * to ip_conntrack_lock!!! -HW */
319 proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
320 if (proto && proto->destroy)
323 if (ip_conntrack_destroyed)
324 ip_conntrack_destroyed(ct);
326 WRITE_LOCK(&ip_conntrack_lock);
327 /* Delete us from our own list to prevent corruption later */
328 list_del(&ct->sibling_list);
330 /* Delete our master expectation */
332 if (ct->master->expectant) {
333 /* can't call __unexpect_related here,
334 * since it would screw up expect_list */
335 list_del(&ct->master->expected_list);
336 master = ct->master->expectant;
340 WRITE_UNLOCK(&ip_conntrack_lock);
343 ip_conntrack_put(master);
345 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
346 kmem_cache_free(ip_conntrack_cachep, ct);
347 atomic_dec(&ip_conntrack_count);
350 static void death_by_timeout(unsigned long ul_conntrack)
352 struct ip_conntrack *ct = (void *)ul_conntrack;
354 WRITE_LOCK(&ip_conntrack_lock);
355 clean_from_lists(ct);
356 WRITE_UNLOCK(&ip_conntrack_lock);
357 ip_conntrack_put(ct);
361 conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
362 const struct ip_conntrack_tuple *tuple,
363 const struct ip_conntrack *ignored_conntrack)
365 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
366 return i->ctrack != ignored_conntrack
367 && ip_ct_tuple_equal(tuple, &i->tuple);
370 static struct ip_conntrack_tuple_hash *
371 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
372 const struct ip_conntrack *ignored_conntrack)
374 struct ip_conntrack_tuple_hash *h;
375 unsigned int hash = hash_conntrack(tuple);
377 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
378 h = LIST_FIND(&ip_conntrack_hash[hash],
380 struct ip_conntrack_tuple_hash *,
381 tuple, ignored_conntrack);
385 /* Find a connection corresponding to a tuple. */
386 struct ip_conntrack_tuple_hash *
387 ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
388 const struct ip_conntrack *ignored_conntrack)
390 struct ip_conntrack_tuple_hash *h;
392 READ_LOCK(&ip_conntrack_lock);
393 h = __ip_conntrack_find(tuple, ignored_conntrack);
395 atomic_inc(&h->ctrack->ct_general.use);
396 READ_UNLOCK(&ip_conntrack_lock);
401 static inline struct ip_conntrack *
402 __ip_conntrack_get(struct nf_ct_info *nfct, enum ip_conntrack_info *ctinfo)
404 struct ip_conntrack *ct
405 = (struct ip_conntrack *)nfct->master;
407 /* ctinfo is the index of the nfct inside the conntrack */
408 *ctinfo = nfct - ct->infos;
409 IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER);
413 /* Return conntrack and conntrack_info given skb->nfct->master */
414 struct ip_conntrack *
415 ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
418 return __ip_conntrack_get(skb->nfct, ctinfo);
422 /* Confirm a connection given skb->nfct; places it in hash table */
424 __ip_conntrack_confirm(struct nf_ct_info *nfct)
426 unsigned int hash, repl_hash;
427 struct ip_conntrack *ct;
428 enum ip_conntrack_info ctinfo;
430 ct = __ip_conntrack_get(nfct, &ctinfo);
432 /* ipt_REJECT uses ip_conntrack_attach to attach related
433 ICMP/TCP RST packets in other direction. Actual packet
434 which created connection will be IP_CT_NEW or for an
435 expected connection, IP_CT_RELATED. */
436 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
439 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
440 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
442 /* We're not in hash table, and we refuse to set up related
443 connections for unconfirmed conns. But packet copies and
444 REJECT will give spurious warnings here. */
445 /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
447 /* No external references means noone else could have
449 IP_NF_ASSERT(!is_confirmed(ct));
450 DEBUGP("Confirming conntrack %p\n", ct);
452 WRITE_LOCK(&ip_conntrack_lock);
453 /* See if there's one in the list already, including reverse:
454 NAT could have grabbed it without realizing, since we're
455 not in the hash. If there is, we lost race. */
456 if (!LIST_FIND(&ip_conntrack_hash[hash],
458 struct ip_conntrack_tuple_hash *,
459 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
460 && !LIST_FIND(&ip_conntrack_hash[repl_hash],
462 struct ip_conntrack_tuple_hash *,
463 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
464 list_prepend(&ip_conntrack_hash[hash],
465 &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
466 list_prepend(&ip_conntrack_hash[repl_hash],
467 &ct->tuplehash[IP_CT_DIR_REPLY]);
468 /* Timer relative to confirmation time, not original
469 setting time, otherwise we'd get timer wrap in
470 weird delay cases. */
471 ct->timeout.expires += jiffies;
472 add_timer(&ct->timeout);
473 atomic_inc(&ct->ct_general.use);
474 set_bit(IPS_CONFIRMED_BIT, &ct->status);
475 WRITE_UNLOCK(&ip_conntrack_lock);
479 WRITE_UNLOCK(&ip_conntrack_lock);
483 /* Returns true if a connection correspondings to the tuple (required
486 ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
487 const struct ip_conntrack *ignored_conntrack)
489 struct ip_conntrack_tuple_hash *h;
491 READ_LOCK(&ip_conntrack_lock);
492 h = __ip_conntrack_find(tuple, ignored_conntrack);
493 READ_UNLOCK(&ip_conntrack_lock);
498 /* Returns conntrack if it dealt with ICMP, and filled in skb fields */
499 struct ip_conntrack *
500 icmp_error_track(struct sk_buff *skb,
501 enum ip_conntrack_info *ctinfo,
502 unsigned int hooknum)
504 struct ip_conntrack_tuple innertuple, origtuple;
509 struct ip_conntrack_protocol *innerproto;
510 struct ip_conntrack_tuple_hash *h;
513 IP_NF_ASSERT(skb->nfct == NULL);
515 /* Not enough header? */
516 if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &inside, sizeof(inside))!=0)
519 if (inside.icmp.type != ICMP_DEST_UNREACH
520 && inside.icmp.type != ICMP_SOURCE_QUENCH
521 && inside.icmp.type != ICMP_TIME_EXCEEDED
522 && inside.icmp.type != ICMP_PARAMETERPROB
523 && inside.icmp.type != ICMP_REDIRECT)
526 /* Ignore ICMP's containing fragments (shouldn't happen) */
527 if (inside.ip.frag_off & htons(IP_OFFSET)) {
528 DEBUGP("icmp_error_track: fragment of proto %u\n",
533 innerproto = ip_ct_find_proto(inside.ip.protocol);
534 dataoff = skb->nh.iph->ihl*4 + sizeof(inside.icmp) + inside.ip.ihl*4;
535 /* Are they talking about one of our connections? */
536 if (!get_tuple(&inside.ip, skb, dataoff, &origtuple, innerproto)) {
537 DEBUGP("icmp_error: ! get_tuple p=%u", inside.ip.protocol);
541 /* Ordinarily, we'd expect the inverted tupleproto, but it's
542 been preserved inside the ICMP. */
543 if (!invert_tuple(&innertuple, &origtuple, innerproto)) {
544 DEBUGP("icmp_error_track: Can't invert tuple\n");
548 *ctinfo = IP_CT_RELATED;
550 h = ip_conntrack_find_get(&innertuple, NULL);
552 /* Locally generated ICMPs will match inverted if they
553 haven't been SNAT'ed yet */
554 /* FIXME: NAT code has to handle half-done double NAT --RR */
555 if (hooknum == NF_IP_LOCAL_OUT)
556 h = ip_conntrack_find_get(&origtuple, NULL);
559 DEBUGP("icmp_error_track: no match\n");
562 /* Reverse direction from that found */
563 if (DIRECTION(h) != IP_CT_DIR_REPLY)
564 *ctinfo += IP_CT_IS_REPLY;
566 if (DIRECTION(h) == IP_CT_DIR_REPLY)
567 *ctinfo += IP_CT_IS_REPLY;
570 /* Update skb to refer to this connection */
571 skb->nfct = &h->ctrack->infos[*ctinfo];
575 /* There's a small race here where we may free a just-assured
576 connection. Too bad: we're in trouble anyway. */
577 static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
579 return !(test_bit(IPS_ASSURED_BIT, &i->ctrack->status));
582 static int early_drop(struct list_head *chain)
584 /* Traverse backwards: gives us oldest, which is roughly LRU */
585 struct ip_conntrack_tuple_hash *h;
588 READ_LOCK(&ip_conntrack_lock);
589 h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
591 atomic_inc(&h->ctrack->ct_general.use);
592 READ_UNLOCK(&ip_conntrack_lock);
597 if (del_timer(&h->ctrack->timeout)) {
598 death_by_timeout((unsigned long)h->ctrack);
601 ip_conntrack_put(h->ctrack);
605 static inline int helper_cmp(const struct ip_conntrack_helper *i,
606 const struct ip_conntrack_tuple *rtuple)
608 return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
611 struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
613 return LIST_FIND(&helpers, helper_cmp,
614 struct ip_conntrack_helper *,
618 /* Allocate a new conntrack: we return -ENOMEM if classification
619 failed due to stress. Otherwise it really is unclassifiable. */
620 static struct ip_conntrack_tuple_hash *
621 init_conntrack(const struct ip_conntrack_tuple *tuple,
622 struct ip_conntrack_protocol *protocol,
625 struct ip_conntrack *conntrack;
626 struct ip_conntrack_tuple repl_tuple;
628 struct ip_conntrack_expect *expected;
630 static unsigned int drop_next;
632 if (!ip_conntrack_hash_rnd_initted) {
633 get_random_bytes(&ip_conntrack_hash_rnd, 4);
634 ip_conntrack_hash_rnd_initted = 1;
637 hash = hash_conntrack(tuple);
639 if (ip_conntrack_max &&
640 atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
641 /* Try dropping from random chain, or else from the
642 chain about to put into (in case they're trying to
643 bomb one hash chain). */
644 unsigned int next = (drop_next++)%ip_conntrack_htable_size;
646 if (!early_drop(&ip_conntrack_hash[next])
647 && !early_drop(&ip_conntrack_hash[hash])) {
650 "ip_conntrack: table full, dropping"
652 return ERR_PTR(-ENOMEM);
656 if (!invert_tuple(&repl_tuple, tuple, protocol)) {
657 DEBUGP("Can't invert tuple.\n");
661 conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
663 DEBUGP("Can't allocate conntrack.\n");
664 return ERR_PTR(-ENOMEM);
667 memset(conntrack, 0, sizeof(*conntrack));
668 atomic_set(&conntrack->ct_general.use, 1);
669 conntrack->ct_general.destroy = destroy_conntrack;
670 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
671 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack;
672 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
673 conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack;
674 for (i=0; i < IP_CT_NUMBER; i++)
675 conntrack->infos[i].master = &conntrack->ct_general;
677 if (!protocol->new(conntrack, skb)) {
678 kmem_cache_free(ip_conntrack_cachep, conntrack);
681 /* Don't set timer yet: wait for confirmation */
682 init_timer(&conntrack->timeout);
683 conntrack->timeout.data = (unsigned long)conntrack;
684 conntrack->timeout.function = death_by_timeout;
686 INIT_LIST_HEAD(&conntrack->sibling_list);
688 WRITE_LOCK(&ip_conntrack_lock);
689 /* Need finding and deleting of expected ONLY if we win race */
690 READ_LOCK(&ip_conntrack_expect_tuple_lock);
691 expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
692 struct ip_conntrack_expect *, tuple);
693 READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
695 /* If master is not in hash table yet (ie. packet hasn't left
696 this machine yet), how can other end know about expected?
697 Hence these are not the droids you are looking for (if
698 master ct never got confirmed, we'd hold a reference to it
699 and weird things would happen to future packets). */
700 if (expected && !is_confirmed(expected->expectant))
703 /* Look up the conntrack helper for master connections only */
705 conntrack->helper = ip_ct_find_helper(&repl_tuple);
707 /* If the expectation is dying, then this is a loser. */
709 && expected->expectant->helper->timeout
710 && ! del_timer(&expected->timeout))
714 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
715 conntrack, expected);
716 /* Welcome, Mr. Bond. We've been expecting you... */
717 IP_NF_ASSERT(master_ct(conntrack));
718 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
719 conntrack->master = expected;
720 expected->sibling = conntrack;
721 LIST_DELETE(&ip_conntrack_expect_list, expected);
722 expected->expectant->expecting--;
723 nf_conntrack_get(&master_ct(conntrack)->infos[0]);
725 atomic_inc(&ip_conntrack_count);
726 WRITE_UNLOCK(&ip_conntrack_lock);
728 if (expected && expected->expectfn)
729 expected->expectfn(conntrack);
730 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
733 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
734 static inline struct ip_conntrack *
735 resolve_normal_ct(struct sk_buff *skb,
736 struct ip_conntrack_protocol *proto,
738 unsigned int hooknum,
739 enum ip_conntrack_info *ctinfo)
741 struct ip_conntrack_tuple tuple;
742 struct ip_conntrack_tuple_hash *h;
744 IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
746 if (!get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, &tuple, proto))
749 /* look for tuple match */
750 h = ip_conntrack_find_get(&tuple, NULL);
752 h = init_conntrack(&tuple, proto, skb);
759 /* It exists; we have (non-exclusive) reference. */
760 if (DIRECTION(h) == IP_CT_DIR_REPLY) {
761 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
762 /* Please set reply bit if this packet OK */
765 /* Once we've had two way comms, always ESTABLISHED. */
766 if (test_bit(IPS_SEEN_REPLY_BIT, &h->ctrack->status)) {
767 DEBUGP("ip_conntrack_in: normal packet for %p\n",
769 *ctinfo = IP_CT_ESTABLISHED;
770 } else if (test_bit(IPS_EXPECTED_BIT, &h->ctrack->status)) {
771 DEBUGP("ip_conntrack_in: related packet for %p\n",
773 *ctinfo = IP_CT_RELATED;
775 DEBUGP("ip_conntrack_in: new packet for %p\n",
781 skb->nfct = &h->ctrack->infos[*ctinfo];
785 /* Netfilter hook itself. */
786 unsigned int ip_conntrack_in(unsigned int hooknum,
787 struct sk_buff **pskb,
788 const struct net_device *in,
789 const struct net_device *out,
790 int (*okfn)(struct sk_buff *))
792 struct ip_conntrack *ct;
793 enum ip_conntrack_info ctinfo;
794 struct ip_conntrack_protocol *proto;
799 if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
800 if (net_ratelimit()) {
801 printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
802 (*pskb)->nh.iph->protocol, hooknum);
807 /* FIXME: Do this right please. --RR */
808 (*pskb)->nfcache |= NFC_UNKNOWN;
810 /* Doesn't cover locally-generated broadcast, so not worth it. */
812 /* Ignore broadcast: no `connection'. */
813 if ((*pskb)->pkt_type == PACKET_BROADCAST) {
814 printk("Broadcast packet!\n");
816 } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
817 == htonl(0x000000FF)) {
818 printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
819 NIPQUAD((*pskb)->nh.iph->saddr),
820 NIPQUAD((*pskb)->nh.iph->daddr),
821 (*pskb)->sk, (*pskb)->pkt_type);
825 /* Previously seen (loopback or untracked)? Ignore. */
829 proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
831 /* It may be an icmp error... */
832 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
833 && icmp_error_track(*pskb, &ctinfo, hooknum))
836 if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo)))
837 /* Not valid part of a connection */
841 /* Too stressed to deal. */
844 IP_NF_ASSERT((*pskb)->nfct);
846 ret = proto->packet(ct, *pskb, ctinfo);
849 nf_conntrack_put((*pskb)->nfct);
850 (*pskb)->nfct = NULL;
854 if (ret != NF_DROP && ct->helper) {
855 ret = ct->helper->help(*pskb, ct, ctinfo);
858 nf_conntrack_put((*pskb)->nfct);
859 (*pskb)->nfct = NULL;
864 set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
869 int invert_tuplepr(struct ip_conntrack_tuple *inverse,
870 const struct ip_conntrack_tuple *orig)
872 return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum));
875 static inline int resent_expect(const struct ip_conntrack_expect *i,
876 const struct ip_conntrack_tuple *tuple,
877 const struct ip_conntrack_tuple *mask)
879 DEBUGP("resent_expect\n");
880 DEBUGP(" tuple: "); DUMP_TUPLE(&i->tuple);
881 DEBUGP("ct_tuple: "); DUMP_TUPLE(&i->ct_tuple);
882 DEBUGP("test tuple: "); DUMP_TUPLE(tuple);
883 return (((i->ct_tuple.dst.protonum == 0 && ip_ct_tuple_equal(&i->tuple, tuple))
884 || (i->ct_tuple.dst.protonum && ip_ct_tuple_equal(&i->ct_tuple, tuple)))
885 && ip_ct_tuple_equal(&i->mask, mask));
888 /* Would two expected things clash? */
889 static inline int expect_clash(const struct ip_conntrack_expect *i,
890 const struct ip_conntrack_tuple *tuple,
891 const struct ip_conntrack_tuple *mask)
893 /* Part covered by intersection of masks must be unequal,
894 otherwise they clash */
895 struct ip_conntrack_tuple intersect_mask
896 = { { i->mask.src.ip & mask->src.ip,
897 { i->mask.src.u.all & mask->src.u.all } },
898 { i->mask.dst.ip & mask->dst.ip,
899 { i->mask.dst.u.all & mask->dst.u.all },
900 i->mask.dst.protonum & mask->dst.protonum } };
902 return ip_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask);
905 inline void ip_conntrack_unexpect_related(struct ip_conntrack_expect *expect)
907 WRITE_LOCK(&ip_conntrack_lock);
908 unexpect_related(expect);
909 WRITE_UNLOCK(&ip_conntrack_lock);
912 static void expectation_timed_out(unsigned long ul_expect)
914 struct ip_conntrack_expect *expect = (void *) ul_expect;
916 DEBUGP("expectation %p timed out\n", expect);
917 WRITE_LOCK(&ip_conntrack_lock);
918 __unexpect_related(expect);
919 WRITE_UNLOCK(&ip_conntrack_lock);
922 struct ip_conntrack_expect *
923 ip_conntrack_expect_alloc()
925 struct ip_conntrack_expect *new;
927 new = (struct ip_conntrack_expect *)
928 kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC);
930 DEBUGP("expect_related: OOM allocating expect\n");
934 /* tuple_cmp compares whole union, we have to initialized cleanly */
935 memset(new, 0, sizeof(struct ip_conntrack_expect));
941 ip_conntrack_expect_insert(struct ip_conntrack_expect *new,
942 struct ip_conntrack *related_to)
944 DEBUGP("new expectation %p of conntrack %p\n", new, related_to);
945 new->expectant = related_to;
947 atomic_set(&new->use, 1);
949 /* add to expected list for this connection */
950 list_add(&new->expected_list, &related_to->sibling_list);
951 /* add to global list of expectations */
953 list_prepend(&ip_conntrack_expect_list, &new->list);
954 /* add and start timer if required */
955 if (related_to->helper->timeout) {
956 init_timer(&new->timeout);
957 new->timeout.data = (unsigned long)new;
958 new->timeout.function = expectation_timed_out;
959 new->timeout.expires = jiffies +
960 related_to->helper->timeout * HZ;
961 add_timer(&new->timeout);
963 related_to->expecting++;
966 /* Add a related connection. */
967 int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
968 struct ip_conntrack *related_to)
970 struct ip_conntrack_expect *old;
973 WRITE_LOCK(&ip_conntrack_lock);
974 /* Because of the write lock, no reader can walk the lists,
975 * so there is no need to use the tuple lock too */
977 DEBUGP("ip_conntrack_expect_related %p\n", related_to);
978 DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
979 DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
981 old = LIST_FIND(&ip_conntrack_expect_list, resent_expect,
982 struct ip_conntrack_expect *, &expect->tuple,
985 /* Helper private data may contain offsets but no pointers
986 pointing into the payload - otherwise we should have to copy
987 the data filled out by the helper over the old one */
988 DEBUGP("expect_related: resent packet\n");
989 if (related_to->helper->timeout) {
990 if (!del_timer(&old->timeout)) {
991 /* expectation is dying. Fall through */
994 old->timeout.expires = jiffies +
995 related_to->helper->timeout * HZ;
996 add_timer(&old->timeout);
1000 WRITE_UNLOCK(&ip_conntrack_lock);
1004 } else if (related_to->helper->max_expected &&
1005 related_to->expecting >= related_to->helper->max_expected) {
1006 struct list_head *cur_item;
1008 if (!(related_to->helper->flags &
1009 IP_CT_HELPER_F_REUSE_EXPECT)) {
1010 WRITE_UNLOCK(&ip_conntrack_lock);
1011 if (net_ratelimit())
1013 "ip_conntrack: max number of expected "
1014 "connections %i of %s reached for "
1015 "%u.%u.%u.%u->%u.%u.%u.%u\n",
1016 related_to->helper->max_expected,
1017 related_to->helper->name,
1018 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
1019 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
1023 DEBUGP("ip_conntrack: max number of expected "
1024 "connections %i of %s reached for "
1025 "%u.%u.%u.%u->%u.%u.%u.%u, reusing\n",
1026 related_to->helper->max_expected,
1027 related_to->helper->name,
1028 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
1029 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
1031 /* choose the the oldest expectation to evict */
1032 list_for_each(cur_item, &related_to->sibling_list) {
1033 struct ip_conntrack_expect *cur;
1035 cur = list_entry(cur_item,
1036 struct ip_conntrack_expect,
1038 if (cur->sibling == NULL) {
1044 /* (!old) cannot happen, since related_to->expecting is the
1045 * number of unconfirmed expects */
1048 /* newnat14 does not reuse the real allocated memory
1049 * structures but rather unexpects the old and
1050 * allocates a new. unexpect_related will decrement
1051 * related_to->expecting.
1053 unexpect_related(old);
1055 } else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash,
1056 struct ip_conntrack_expect *, &expect->tuple,
1058 WRITE_UNLOCK(&ip_conntrack_lock);
1059 DEBUGP("expect_related: busy!\n");
1065 out: ip_conntrack_expect_insert(expect, related_to);
1067 WRITE_UNLOCK(&ip_conntrack_lock);
1072 /* Change tuple in an existing expectation */
1073 int ip_conntrack_change_expect(struct ip_conntrack_expect *expect,
1074 struct ip_conntrack_tuple *newtuple)
1078 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
1079 WRITE_LOCK(&ip_conntrack_expect_tuple_lock);
1081 DEBUGP("change_expect:\n");
1082 DEBUGP("exp tuple: "); DUMP_TUPLE(&expect->tuple);
1083 DEBUGP("exp mask: "); DUMP_TUPLE(&expect->mask);
1084 DEBUGP("newtuple: "); DUMP_TUPLE(newtuple);
1085 if (expect->ct_tuple.dst.protonum == 0) {
1086 /* Never seen before */
1087 DEBUGP("change expect: never seen before\n");
1088 if (!ip_ct_tuple_equal(&expect->tuple, newtuple)
1089 && LIST_FIND(&ip_conntrack_expect_list, expect_clash,
1090 struct ip_conntrack_expect *, newtuple, &expect->mask)) {
1091 /* Force NAT to find an unused tuple */
1094 memcpy(&expect->ct_tuple, &expect->tuple, sizeof(expect->tuple));
1095 memcpy(&expect->tuple, newtuple, sizeof(expect->tuple));
1100 DEBUGP("change expect: resent packet\n");
1101 if (ip_ct_tuple_equal(&expect->tuple, newtuple)) {
1104 /* Force NAT to choose again the same port */
1108 WRITE_UNLOCK(&ip_conntrack_expect_tuple_lock);
1113 /* Alter reply tuple (maybe alter helper). If it's already taken,
1114 return 0 and don't do alteration. */
1115 int ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
1116 const struct ip_conntrack_tuple *newreply)
1118 WRITE_LOCK(&ip_conntrack_lock);
1119 if (__ip_conntrack_find(newreply, conntrack)) {
1120 WRITE_UNLOCK(&ip_conntrack_lock);
1123 /* Should be unconfirmed, so not in hash table yet */
1124 IP_NF_ASSERT(!is_confirmed(conntrack));
1126 DEBUGP("Altering reply tuple of %p to ", conntrack);
1127 DUMP_TUPLE(newreply);
1129 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1130 if (!conntrack->master)
1131 conntrack->helper = LIST_FIND(&helpers, helper_cmp,
1132 struct ip_conntrack_helper *,
1134 WRITE_UNLOCK(&ip_conntrack_lock);
1139 int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
1141 WRITE_LOCK(&ip_conntrack_lock);
1142 list_prepend(&helpers, me);
1143 WRITE_UNLOCK(&ip_conntrack_lock);
1148 static inline int unhelp(struct ip_conntrack_tuple_hash *i,
1149 const struct ip_conntrack_helper *me)
1151 if (i->ctrack->helper == me) {
1152 /* Get rid of any expected. */
1153 remove_expectations(i->ctrack, 0);
1154 /* And *then* set helper to NULL */
1155 i->ctrack->helper = NULL;
1160 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1164 /* Need write lock here, to delete helper. */
1165 WRITE_LOCK(&ip_conntrack_lock);
1166 LIST_DELETE(&helpers, me);
1168 /* Get rid of expecteds, set helpers to NULL. */
1169 for (i = 0; i < ip_conntrack_htable_size; i++)
1170 LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
1171 struct ip_conntrack_tuple_hash *, me);
1172 WRITE_UNLOCK(&ip_conntrack_lock);
1174 /* Someone could be still looking at the helper in a bh. */
1178 /* Refresh conntrack for this many jiffies. */
1179 void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies)
1181 IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
1183 /* If not in hash table, timer will not be active yet */
1184 if (!is_confirmed(ct))
1185 ct->timeout.expires = extra_jiffies;
1187 WRITE_LOCK(&ip_conntrack_lock);
1188 /* Need del_timer for race avoidance (may already be dying). */
1189 if (del_timer(&ct->timeout)) {
1190 ct->timeout.expires = jiffies + extra_jiffies;
1191 add_timer(&ct->timeout);
1193 WRITE_UNLOCK(&ip_conntrack_lock);
1197 /* Returns new sk_buff, or NULL */
1199 ip_ct_gather_frags(struct sk_buff *skb)
1201 struct sock *sk = skb->sk;
1202 #ifdef CONFIG_NETFILTER_DEBUG
1203 unsigned int olddebug = skb->nf_debug;
1211 skb = ip_defrag(skb);
1221 skb_set_owner_w(skb, sk);
1225 ip_send_check(skb->nh.iph);
1226 skb->nfcache |= NFC_ALTERED;
1227 #ifdef CONFIG_NETFILTER_DEBUG
1228 /* Packet path as if nothing had happened. */
1229 skb->nf_debug = olddebug;
1234 /* Used by ipt_REJECT. */
1235 static void ip_conntrack_attach(struct sk_buff *nskb, struct nf_ct_info *nfct)
1237 struct ip_conntrack *ct;
1238 enum ip_conntrack_info ctinfo;
1240 ct = __ip_conntrack_get(nfct, &ctinfo);
1242 /* This ICMP is in reverse direction to the packet which
1244 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1245 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1247 ctinfo = IP_CT_RELATED;
1249 /* Attach new skbuff, and increment count */
1250 nskb->nfct = &ct->infos[ctinfo];
1251 atomic_inc(&ct->ct_general.use);
1255 do_kill(const struct ip_conntrack_tuple_hash *i,
1256 int (*kill)(const struct ip_conntrack *i, void *data),
1259 return kill(i->ctrack, data);
1262 /* Bring out ya dead! */
1263 static struct ip_conntrack_tuple_hash *
1264 get_next_corpse(int (*kill)(const struct ip_conntrack *i, void *data),
1265 void *data, unsigned int *bucket)
1267 struct ip_conntrack_tuple_hash *h = NULL;
1269 READ_LOCK(&ip_conntrack_lock);
1270 for (; !h && *bucket < ip_conntrack_htable_size; (*bucket)++) {
1271 h = LIST_FIND(&ip_conntrack_hash[*bucket], do_kill,
1272 struct ip_conntrack_tuple_hash *, kill, data);
1275 atomic_inc(&h->ctrack->ct_general.use);
1276 READ_UNLOCK(&ip_conntrack_lock);
1282 ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data),
1285 struct ip_conntrack_tuple_hash *h;
1286 unsigned int bucket = 0;
1288 while ((h = get_next_corpse(kill, data, &bucket)) != NULL) {
1289 /* Time to push up daises... */
1290 if (del_timer(&h->ctrack->timeout))
1291 death_by_timeout((unsigned long)h->ctrack);
1292 /* ... else the timer will get him soon. */
1294 ip_conntrack_put(h->ctrack);
1298 /* Fast function for those who don't want to parse /proc (and I don't
1300 /* Reversing the socket's dst/src point of view gives us the reply
1303 getorigdst(struct sock *sk, int optval, void *user, int *len)
1305 struct inet_opt *inet = inet_sk(sk);
1306 struct ip_conntrack_tuple_hash *h;
1307 struct ip_conntrack_tuple tuple;
1309 IP_CT_TUPLE_U_BLANK(&tuple);
1310 tuple.src.ip = inet->rcv_saddr;
1311 tuple.src.u.tcp.port = inet->sport;
1312 tuple.dst.ip = inet->daddr;
1313 tuple.dst.u.tcp.port = inet->dport;
1314 tuple.dst.protonum = IPPROTO_TCP;
1316 /* We only do TCP at the moment: is there a better way? */
1317 if (strcmp(sk->sk_prot->name, "TCP")) {
1318 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
1319 return -ENOPROTOOPT;
1322 if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
1323 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
1324 *len, sizeof(struct sockaddr_in));
1328 h = ip_conntrack_find_get(&tuple, NULL);
1330 struct sockaddr_in sin;
1332 sin.sin_family = AF_INET;
1333 sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
1334 .tuple.dst.u.tcp.port;
1335 sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
1338 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
1339 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1340 ip_conntrack_put(h->ctrack);
1341 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
1346 DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
1347 NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
1348 NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
1352 static struct nf_sockopt_ops so_getorigdst = {
1354 .get_optmin = SO_ORIGINAL_DST,
1355 .get_optmax = SO_ORIGINAL_DST+1,
1359 static int kill_all(const struct ip_conntrack *i, void *data)
1364 /* Mishearing the voices in his head, our hero wonders how he's
1365 supposed to kill the mall. */
1366 void ip_conntrack_cleanup(void)
1368 ip_ct_attach = NULL;
1369 /* This makes sure all current packets have passed through
1370 netfilter framework. Roll on, two-stage module
1375 ip_ct_selective_cleanup(kill_all, NULL);
1376 if (atomic_read(&ip_conntrack_count) != 0) {
1378 goto i_see_dead_people;
1381 kmem_cache_destroy(ip_conntrack_cachep);
1382 vfree(ip_conntrack_hash);
1383 nf_unregister_sockopt(&so_getorigdst);
1386 static int hashsize;
1387 MODULE_PARM(hashsize, "i");
1389 int __init ip_conntrack_init(void)
1394 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1395 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1397 ip_conntrack_htable_size = hashsize;
1399 ip_conntrack_htable_size
1400 = (((num_physpages << PAGE_SHIFT) / 16384)
1401 / sizeof(struct list_head));
1402 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1403 ip_conntrack_htable_size = 8192;
1404 if (ip_conntrack_htable_size < 16)
1405 ip_conntrack_htable_size = 16;
1407 ip_conntrack_max = 8 * ip_conntrack_htable_size;
1409 printk("ip_conntrack version %s (%u buckets, %d max)"
1410 " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
1411 ip_conntrack_htable_size, ip_conntrack_max,
1412 sizeof(struct ip_conntrack));
1414 ret = nf_register_sockopt(&so_getorigdst);
1416 printk(KERN_ERR "Unable to register netfilter socket option\n");
1420 ip_conntrack_hash = vmalloc(sizeof(struct list_head)
1421 * ip_conntrack_htable_size);
1422 if (!ip_conntrack_hash) {
1423 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1424 goto err_unreg_sockopt;
1427 ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
1428 sizeof(struct ip_conntrack), 0,
1429 SLAB_HWCACHE_ALIGN, NULL, NULL);
1430 if (!ip_conntrack_cachep) {
1431 printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
1434 /* Don't NEED lock here, but good form anyway. */
1435 WRITE_LOCK(&ip_conntrack_lock);
1436 /* Sew in builtin protocols. */
1437 list_append(&protocol_list, &ip_conntrack_protocol_tcp);
1438 list_append(&protocol_list, &ip_conntrack_protocol_udp);
1439 list_append(&protocol_list, &ip_conntrack_protocol_icmp);
1440 WRITE_UNLOCK(&ip_conntrack_lock);
1442 for (i = 0; i < ip_conntrack_htable_size; i++)
1443 INIT_LIST_HEAD(&ip_conntrack_hash[i]);
1445 /* For use by ipt_REJECT */
1446 ip_ct_attach = ip_conntrack_attach;
1448 /* Set up fake conntrack:
1449 - to never be deleted, not in any hashes */
1450 atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1451 /* - and look it like as a confirmed connection */
1452 set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1453 /* - and prepare the ctinfo field for REJECT & NAT. */
1454 ip_conntrack_untracked.infos[IP_CT_NEW].master =
1455 ip_conntrack_untracked.infos[IP_CT_RELATED].master =
1456 ip_conntrack_untracked.infos[IP_CT_RELATED + IP_CT_IS_REPLY].master =
1457 &ip_conntrack_untracked.ct_general;
1462 vfree(ip_conntrack_hash);
1464 nf_unregister_sockopt(&so_getorigdst);