1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
12 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
13 * - new API and handling of conntrack/nat helpers
14 * - now capable of multiple expectations for one master
15 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
16 * - add usage/reference counts to ip_conntrack_expect
17 * - export ip_conntrack[_expect]_{find_get,put} functions
20 #include <linux/config.h>
21 #include <linux/types.h>
22 #include <linux/icmp.h>
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/module.h>
27 #include <linux/skbuff.h>
28 #include <linux/proc_fs.h>
29 #include <linux/vmalloc.h>
30 #include <net/checksum.h>
32 #include <linux/stddef.h>
33 #include <linux/sysctl.h>
34 #include <linux/slab.h>
35 #include <linux/random.h>
36 #include <linux/jhash.h>
37 /* For ERR_PTR(). Yeah, I know... --RR */
40 /* This rwlock protects the main hash table, protocol/helper/expected
41 registrations, conntrack timers*/
42 #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
43 #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
45 #include <linux/netfilter_ipv4/ip_conntrack.h>
46 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
47 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
48 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
49 #include <linux/netfilter_ipv4/listhelp.h>
51 #define IP_CONNTRACK_VERSION "2.1"
56 #define DEBUGP(format, args...)
59 DECLARE_RWLOCK(ip_conntrack_lock);
60 DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock);
62 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
63 LIST_HEAD(ip_conntrack_expect_list);
64 LIST_HEAD(protocol_list);
65 static LIST_HEAD(helpers);
66 unsigned int ip_conntrack_htable_size = 0;
68 static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
69 struct list_head *ip_conntrack_hash;
70 static kmem_cache_t *ip_conntrack_cachep;
71 struct ip_conntrack ip_conntrack_untracked;
73 extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
75 static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr,
78 return protocol == curr->proto;
81 struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol)
83 struct ip_conntrack_protocol *p;
85 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
86 p = LIST_FIND(&protocol_list, proto_cmpfn,
87 struct ip_conntrack_protocol *, protocol);
89 p = &ip_conntrack_generic_protocol;
94 struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol)
96 struct ip_conntrack_protocol *p;
98 READ_LOCK(&ip_conntrack_lock);
99 p = __ip_ct_find_proto(protocol);
100 READ_UNLOCK(&ip_conntrack_lock);
105 ip_conntrack_put(struct ip_conntrack *ct)
108 IP_NF_ASSERT(ct->infos[0].master);
109 /* nf_conntrack_put wants to go via an info struct, so feed it
111 nf_conntrack_put(&ct->infos[0]);
114 static int ip_conntrack_hash_rnd_initted;
115 static unsigned int ip_conntrack_hash_rnd;
118 hash_conntrack(const struct ip_conntrack_tuple *tuple)
123 return (jhash_3words(tuple->src.ip,
124 (tuple->dst.ip ^ tuple->dst.protonum),
125 (tuple->src.u.all | (tuple->dst.u.all << 16)),
126 ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
130 get_tuple(const struct iphdr *iph,
131 const struct sk_buff *skb,
132 unsigned int dataoff,
133 struct ip_conntrack_tuple *tuple,
134 const struct ip_conntrack_protocol *protocol)
137 if (iph->frag_off & htons(IP_OFFSET)) {
138 printk("ip_conntrack_core: Frag of proto %u.\n",
143 tuple->src.ip = iph->saddr;
144 tuple->dst.ip = iph->daddr;
145 tuple->dst.protonum = iph->protocol;
146 tuple->src.u.all = tuple->dst.u.all = 0;
148 return protocol->pkt_to_tuple(skb, dataoff, tuple);
152 invert_tuple(struct ip_conntrack_tuple *inverse,
153 const struct ip_conntrack_tuple *orig,
154 const struct ip_conntrack_protocol *protocol)
156 inverse->src.ip = orig->dst.ip;
157 inverse->dst.ip = orig->src.ip;
158 inverse->dst.protonum = orig->dst.protonum;
160 inverse->src.u.all = inverse->dst.u.all = 0;
162 return protocol->invert_tuple(inverse, orig);
166 /* ip_conntrack_expect helper functions */
168 /* Compare tuple parts depending on mask. */
169 static inline int expect_cmp(const struct ip_conntrack_expect *i,
170 const struct ip_conntrack_tuple *tuple)
172 MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
173 return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask);
177 destroy_expect(struct ip_conntrack_expect *exp)
179 DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(&exp->use));
180 IP_NF_ASSERT(atomic_read(&exp->use) == 0);
181 IP_NF_ASSERT(!timer_pending(&exp->timeout));
186 inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
190 if (atomic_dec_and_test(&exp->use)) {
191 /* usage count dropped to zero */
196 static inline struct ip_conntrack_expect *
197 __ip_ct_expect_find(const struct ip_conntrack_tuple *tuple)
199 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
200 MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
201 return LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
202 struct ip_conntrack_expect *, tuple);
205 /* Find a expectation corresponding to a tuple. */
206 struct ip_conntrack_expect *
207 ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
209 struct ip_conntrack_expect *exp;
211 READ_LOCK(&ip_conntrack_lock);
212 READ_LOCK(&ip_conntrack_expect_tuple_lock);
213 exp = __ip_ct_expect_find(tuple);
215 atomic_inc(&exp->use);
216 READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
217 READ_UNLOCK(&ip_conntrack_lock);
222 /* remove one specific expectation from all lists and drop refcount,
223 * does _NOT_ delete the timer. */
224 static void __unexpect_related(struct ip_conntrack_expect *expect)
226 DEBUGP("unexpect_related(%p)\n", expect);
227 MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
229 /* we're not allowed to unexpect a confirmed expectation! */
230 IP_NF_ASSERT(!expect->sibling);
232 /* delete from global and local lists */
233 list_del(&expect->list);
234 list_del(&expect->expected_list);
236 /* decrement expect-count of master conntrack */
237 if (expect->expectant)
238 expect->expectant->expecting--;
240 ip_conntrack_expect_put(expect);
243 /* remove one specific expecatation from all lists, drop refcount
245 * This function can _NOT_ be called for confirmed expects! */
246 static void unexpect_related(struct ip_conntrack_expect *expect)
248 IP_NF_ASSERT(expect->expectant);
249 IP_NF_ASSERT(expect->expectant->helper);
250 /* if we are supposed to have a timer, but we can't delete
251 * it: race condition. __unexpect_related will
252 * be calledd by timeout function */
253 if (expect->expectant->helper->timeout
254 && !del_timer(&expect->timeout))
257 __unexpect_related(expect);
260 /* delete all unconfirmed expectations for this conntrack */
261 static void remove_expectations(struct ip_conntrack *ct, int drop_refcount)
263 struct list_head *exp_entry, *next;
264 struct ip_conntrack_expect *exp;
266 DEBUGP("remove_expectations(%p)\n", ct);
268 list_for_each_safe(exp_entry, next, &ct->sibling_list) {
269 exp = list_entry(exp_entry, struct ip_conntrack_expect,
272 /* we skip established expectations, as we want to delete
273 * the un-established ones only */
275 DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct);
277 /* Indicate that this expectations parent is dead */
278 ip_conntrack_put(exp->expectant);
279 exp->expectant = NULL;
284 IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp));
285 IP_NF_ASSERT(exp->expectant == ct);
287 /* delete expectation from global and private lists */
288 unexpect_related(exp);
293 clean_from_lists(struct ip_conntrack *ct)
297 DEBUGP("clean_from_lists(%p)\n", ct);
298 MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
300 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
301 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
302 LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
303 LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
305 /* Destroy all un-established, pending expectations */
306 remove_expectations(ct, 1);
310 destroy_conntrack(struct nf_conntrack *nfct)
312 struct ip_conntrack *ct = (struct ip_conntrack *)nfct, *master = NULL;
313 struct ip_conntrack_protocol *proto;
315 DEBUGP("destroy_conntrack(%p)\n", ct);
316 IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
317 IP_NF_ASSERT(!timer_pending(&ct->timeout));
319 /* To make sure we don't get any weird locking issues here:
320 * destroy_conntrack() MUST NOT be called with a write lock
321 * to ip_conntrack_lock!!! -HW */
322 proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
323 if (proto && proto->destroy)
326 if (ip_conntrack_destroyed)
327 ip_conntrack_destroyed(ct);
329 WRITE_LOCK(&ip_conntrack_lock);
330 /* Make sure don't leave any orphaned expectations lying around */
332 remove_expectations(ct, 1);
334 /* Delete our master expectation */
336 if (ct->master->expectant) {
337 /* can't call __unexpect_related here,
338 * since it would screw up expect_list */
339 list_del(&ct->master->expected_list);
340 master = ct->master->expectant;
344 WRITE_UNLOCK(&ip_conntrack_lock);
347 ip_conntrack_put(master);
349 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
350 kmem_cache_free(ip_conntrack_cachep, ct);
351 atomic_dec(&ip_conntrack_count);
354 static void death_by_timeout(unsigned long ul_conntrack)
356 struct ip_conntrack *ct = (void *)ul_conntrack;
358 WRITE_LOCK(&ip_conntrack_lock);
359 clean_from_lists(ct);
360 WRITE_UNLOCK(&ip_conntrack_lock);
361 ip_conntrack_put(ct);
365 conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
366 const struct ip_conntrack_tuple *tuple,
367 const struct ip_conntrack *ignored_conntrack)
369 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
370 return i->ctrack != ignored_conntrack
371 && ip_ct_tuple_equal(tuple, &i->tuple);
374 static struct ip_conntrack_tuple_hash *
375 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
376 const struct ip_conntrack *ignored_conntrack)
378 struct ip_conntrack_tuple_hash *h;
379 unsigned int hash = hash_conntrack(tuple);
381 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
382 h = LIST_FIND(&ip_conntrack_hash[hash],
384 struct ip_conntrack_tuple_hash *,
385 tuple, ignored_conntrack);
389 /* Find a connection corresponding to a tuple. */
390 struct ip_conntrack_tuple_hash *
391 ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
392 const struct ip_conntrack *ignored_conntrack)
394 struct ip_conntrack_tuple_hash *h;
396 READ_LOCK(&ip_conntrack_lock);
397 h = __ip_conntrack_find(tuple, ignored_conntrack);
399 atomic_inc(&h->ctrack->ct_general.use);
400 READ_UNLOCK(&ip_conntrack_lock);
405 static inline struct ip_conntrack *
406 __ip_conntrack_get(struct nf_ct_info *nfct, enum ip_conntrack_info *ctinfo)
408 struct ip_conntrack *ct
409 = (struct ip_conntrack *)nfct->master;
411 /* ctinfo is the index of the nfct inside the conntrack */
412 *ctinfo = nfct - ct->infos;
413 IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER);
417 /* Return conntrack and conntrack_info given skb->nfct->master */
418 struct ip_conntrack *
419 ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
422 return __ip_conntrack_get(skb->nfct, ctinfo);
426 /* Confirm a connection given skb->nfct; places it in hash table */
428 __ip_conntrack_confirm(struct nf_ct_info *nfct)
430 unsigned int hash, repl_hash;
431 struct ip_conntrack *ct;
432 enum ip_conntrack_info ctinfo;
434 ct = __ip_conntrack_get(nfct, &ctinfo);
436 /* ipt_REJECT uses ip_conntrack_attach to attach related
437 ICMP/TCP RST packets in other direction. Actual packet
438 which created connection will be IP_CT_NEW or for an
439 expected connection, IP_CT_RELATED. */
440 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
443 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
444 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
446 /* We're not in hash table, and we refuse to set up related
447 connections for unconfirmed conns. But packet copies and
448 REJECT will give spurious warnings here. */
449 /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
451 /* No external references means noone else could have
453 IP_NF_ASSERT(!is_confirmed(ct));
454 DEBUGP("Confirming conntrack %p\n", ct);
456 WRITE_LOCK(&ip_conntrack_lock);
457 /* See if there's one in the list already, including reverse:
458 NAT could have grabbed it without realizing, since we're
459 not in the hash. If there is, we lost race. */
460 if (!LIST_FIND(&ip_conntrack_hash[hash],
462 struct ip_conntrack_tuple_hash *,
463 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
464 && !LIST_FIND(&ip_conntrack_hash[repl_hash],
466 struct ip_conntrack_tuple_hash *,
467 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
468 list_prepend(&ip_conntrack_hash[hash],
469 &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
470 list_prepend(&ip_conntrack_hash[repl_hash],
471 &ct->tuplehash[IP_CT_DIR_REPLY]);
472 /* Timer relative to confirmation time, not original
473 setting time, otherwise we'd get timer wrap in
474 weird delay cases. */
475 ct->timeout.expires += jiffies;
476 add_timer(&ct->timeout);
477 atomic_inc(&ct->ct_general.use);
478 set_bit(IPS_CONFIRMED_BIT, &ct->status);
479 WRITE_UNLOCK(&ip_conntrack_lock);
483 WRITE_UNLOCK(&ip_conntrack_lock);
487 /* Returns true if a connection correspondings to the tuple (required
490 ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
491 const struct ip_conntrack *ignored_conntrack)
493 struct ip_conntrack_tuple_hash *h;
495 READ_LOCK(&ip_conntrack_lock);
496 h = __ip_conntrack_find(tuple, ignored_conntrack);
497 READ_UNLOCK(&ip_conntrack_lock);
502 /* Returns conntrack if it dealt with ICMP, and filled in skb fields */
503 struct ip_conntrack *
504 icmp_error_track(struct sk_buff *skb,
505 enum ip_conntrack_info *ctinfo,
506 unsigned int hooknum)
508 struct ip_conntrack_tuple innertuple, origtuple;
513 struct ip_conntrack_protocol *innerproto;
514 struct ip_conntrack_tuple_hash *h;
517 IP_NF_ASSERT(skb->nfct == NULL);
519 /* Not enough header? */
520 if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &inside, sizeof(inside))!=0)
523 if (inside.icmp.type != ICMP_DEST_UNREACH
524 && inside.icmp.type != ICMP_SOURCE_QUENCH
525 && inside.icmp.type != ICMP_TIME_EXCEEDED
526 && inside.icmp.type != ICMP_PARAMETERPROB
527 && inside.icmp.type != ICMP_REDIRECT)
530 /* Ignore ICMP's containing fragments (shouldn't happen) */
531 if (inside.ip.frag_off & htons(IP_OFFSET)) {
532 DEBUGP("icmp_error_track: fragment of proto %u\n",
537 innerproto = ip_ct_find_proto(inside.ip.protocol);
538 dataoff = skb->nh.iph->ihl*4 + sizeof(inside.icmp) + inside.ip.ihl*4;
539 /* Are they talking about one of our connections? */
540 if (!get_tuple(&inside.ip, skb, dataoff, &origtuple, innerproto)) {
541 DEBUGP("icmp_error: ! get_tuple p=%u", inside.ip.protocol);
545 /* Ordinarily, we'd expect the inverted tupleproto, but it's
546 been preserved inside the ICMP. */
547 if (!invert_tuple(&innertuple, &origtuple, innerproto)) {
548 DEBUGP("icmp_error_track: Can't invert tuple\n");
552 *ctinfo = IP_CT_RELATED;
554 h = ip_conntrack_find_get(&innertuple, NULL);
556 /* Locally generated ICMPs will match inverted if they
557 haven't been SNAT'ed yet */
558 /* FIXME: NAT code has to handle half-done double NAT --RR */
559 if (hooknum == NF_IP_LOCAL_OUT)
560 h = ip_conntrack_find_get(&origtuple, NULL);
563 DEBUGP("icmp_error_track: no match\n");
566 /* Reverse direction from that found */
567 if (DIRECTION(h) != IP_CT_DIR_REPLY)
568 *ctinfo += IP_CT_IS_REPLY;
570 if (DIRECTION(h) == IP_CT_DIR_REPLY)
571 *ctinfo += IP_CT_IS_REPLY;
574 /* Update skb to refer to this connection */
575 skb->nfct = &h->ctrack->infos[*ctinfo];
579 /* There's a small race here where we may free a just-assured
580 connection. Too bad: we're in trouble anyway. */
581 static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
583 return !(test_bit(IPS_ASSURED_BIT, &i->ctrack->status));
586 static int early_drop(struct list_head *chain)
588 /* Traverse backwards: gives us oldest, which is roughly LRU */
589 struct ip_conntrack_tuple_hash *h;
592 READ_LOCK(&ip_conntrack_lock);
593 h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
595 atomic_inc(&h->ctrack->ct_general.use);
596 READ_UNLOCK(&ip_conntrack_lock);
601 if (del_timer(&h->ctrack->timeout)) {
602 death_by_timeout((unsigned long)h->ctrack);
605 ip_conntrack_put(h->ctrack);
609 static inline int helper_cmp(const struct ip_conntrack_helper *i,
610 const struct ip_conntrack_tuple *rtuple)
612 return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
615 struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
617 return LIST_FIND(&helpers, helper_cmp,
618 struct ip_conntrack_helper *,
622 /* Allocate a new conntrack: we return -ENOMEM if classification
623 failed due to stress. Otherwise it really is unclassifiable. */
624 static struct ip_conntrack_tuple_hash *
625 init_conntrack(const struct ip_conntrack_tuple *tuple,
626 struct ip_conntrack_protocol *protocol,
629 struct ip_conntrack *conntrack;
630 struct ip_conntrack_tuple repl_tuple;
632 struct ip_conntrack_expect *expected;
634 static unsigned int drop_next;
636 if (!ip_conntrack_hash_rnd_initted) {
637 get_random_bytes(&ip_conntrack_hash_rnd, 4);
638 ip_conntrack_hash_rnd_initted = 1;
641 hash = hash_conntrack(tuple);
643 if (ip_conntrack_max &&
644 atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
645 /* Try dropping from random chain, or else from the
646 chain about to put into (in case they're trying to
647 bomb one hash chain). */
648 unsigned int next = (drop_next++)%ip_conntrack_htable_size;
650 if (!early_drop(&ip_conntrack_hash[next])
651 && !early_drop(&ip_conntrack_hash[hash])) {
654 "ip_conntrack: table full, dropping"
656 return ERR_PTR(-ENOMEM);
660 if (!invert_tuple(&repl_tuple, tuple, protocol)) {
661 DEBUGP("Can't invert tuple.\n");
665 conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
667 DEBUGP("Can't allocate conntrack.\n");
668 return ERR_PTR(-ENOMEM);
671 memset(conntrack, 0, sizeof(*conntrack));
672 atomic_set(&conntrack->ct_general.use, 1);
673 conntrack->ct_general.destroy = destroy_conntrack;
674 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
675 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack;
676 conntrack->xid[IP_CT_DIR_ORIGINAL] = -1;
677 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
678 conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack;
679 conntrack->xid[IP_CT_DIR_REPLY] = -1;
680 for (i=0; i < IP_CT_NUMBER; i++)
681 conntrack->infos[i].master = &conntrack->ct_general;
683 if (!protocol->new(conntrack, skb)) {
684 kmem_cache_free(ip_conntrack_cachep, conntrack);
687 /* Don't set timer yet: wait for confirmation */
688 init_timer(&conntrack->timeout);
689 conntrack->timeout.data = (unsigned long)conntrack;
690 conntrack->timeout.function = death_by_timeout;
692 INIT_LIST_HEAD(&conntrack->sibling_list);
694 WRITE_LOCK(&ip_conntrack_lock);
695 /* Need finding and deleting of expected ONLY if we win race */
696 READ_LOCK(&ip_conntrack_expect_tuple_lock);
697 expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
698 struct ip_conntrack_expect *, tuple);
699 READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
701 /* If master is not in hash table yet (ie. packet hasn't left
702 this machine yet), how can other end know about expected?
703 Hence these are not the droids you are looking for (if
704 master ct never got confirmed, we'd hold a reference to it
705 and weird things would happen to future packets). */
706 if (expected && !is_confirmed(expected->expectant))
709 /* Look up the conntrack helper for master connections only */
711 conntrack->helper = ip_ct_find_helper(&repl_tuple);
713 /* If the expectation is dying, then this is a loser. */
715 && expected->expectant->helper->timeout
716 && ! del_timer(&expected->timeout))
720 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
721 conntrack, expected);
722 /* Welcome, Mr. Bond. We've been expecting you... */
723 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
724 conntrack->master = expected;
725 expected->sibling = conntrack;
726 LIST_DELETE(&ip_conntrack_expect_list, expected);
727 expected->expectant->expecting--;
728 nf_conntrack_get(&master_ct(conntrack)->infos[0]);
730 atomic_inc(&ip_conntrack_count);
731 WRITE_UNLOCK(&ip_conntrack_lock);
733 if (expected && expected->expectfn)
734 expected->expectfn(conntrack);
735 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
738 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
739 static inline struct ip_conntrack *
740 resolve_normal_ct(struct sk_buff *skb,
741 struct ip_conntrack_protocol *proto,
743 unsigned int hooknum,
744 enum ip_conntrack_info *ctinfo)
746 struct ip_conntrack_tuple tuple;
747 struct ip_conntrack_tuple_hash *h;
749 IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
751 if (!get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, &tuple, proto))
754 /* look for tuple match */
755 h = ip_conntrack_find_get(&tuple, NULL);
757 h = init_conntrack(&tuple, proto, skb);
764 /* It exists; we have (non-exclusive) reference. */
765 if (DIRECTION(h) == IP_CT_DIR_REPLY) {
766 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
767 /* Please set reply bit if this packet OK */
770 /* Once we've had two way comms, always ESTABLISHED. */
771 if (test_bit(IPS_SEEN_REPLY_BIT, &h->ctrack->status)) {
772 DEBUGP("ip_conntrack_in: normal packet for %p\n",
774 *ctinfo = IP_CT_ESTABLISHED;
775 } else if (test_bit(IPS_EXPECTED_BIT, &h->ctrack->status)) {
776 DEBUGP("ip_conntrack_in: related packet for %p\n",
778 *ctinfo = IP_CT_RELATED;
780 DEBUGP("ip_conntrack_in: new packet for %p\n",
786 skb->nfct = &h->ctrack->infos[*ctinfo];
790 /* Netfilter hook itself. */
791 unsigned int ip_conntrack_in(unsigned int hooknum,
792 struct sk_buff **pskb,
793 const struct net_device *in,
794 const struct net_device *out,
795 int (*okfn)(struct sk_buff *))
797 struct ip_conntrack *ct;
798 enum ip_conntrack_info ctinfo;
799 struct ip_conntrack_protocol *proto;
804 if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
805 if (net_ratelimit()) {
806 printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
807 (*pskb)->nh.iph->protocol, hooknum);
812 /* FIXME: Do this right please. --RR */
813 (*pskb)->nfcache |= NFC_UNKNOWN;
815 /* Doesn't cover locally-generated broadcast, so not worth it. */
817 /* Ignore broadcast: no `connection'. */
818 if ((*pskb)->pkt_type == PACKET_BROADCAST) {
819 printk("Broadcast packet!\n");
821 } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
822 == htonl(0x000000FF)) {
823 printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
824 NIPQUAD((*pskb)->nh.iph->saddr),
825 NIPQUAD((*pskb)->nh.iph->daddr),
826 (*pskb)->sk, (*pskb)->pkt_type);
830 /* Previously seen (loopback or untracked)? Ignore. */
834 proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
836 /* It may be an icmp error... */
837 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
838 && icmp_error_track(*pskb, &ctinfo, hooknum))
841 if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo)))
842 /* Not valid part of a connection */
846 /* Too stressed to deal. */
849 IP_NF_ASSERT((*pskb)->nfct);
851 ret = proto->packet(ct, *pskb, ctinfo);
854 nf_conntrack_put((*pskb)->nfct);
855 (*pskb)->nfct = NULL;
859 if (ret != NF_DROP && ct->helper) {
860 ret = ct->helper->help(*pskb, ct, ctinfo);
863 nf_conntrack_put((*pskb)->nfct);
864 (*pskb)->nfct = NULL;
869 set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
874 int invert_tuplepr(struct ip_conntrack_tuple *inverse,
875 const struct ip_conntrack_tuple *orig)
877 return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum));
880 static inline int resent_expect(const struct ip_conntrack_expect *i,
881 const struct ip_conntrack_tuple *tuple,
882 const struct ip_conntrack_tuple *mask)
884 DEBUGP("resent_expect\n");
885 DEBUGP(" tuple: "); DUMP_TUPLE(&i->tuple);
886 DEBUGP("ct_tuple: "); DUMP_TUPLE(&i->ct_tuple);
887 DEBUGP("test tuple: "); DUMP_TUPLE(tuple);
888 return (((i->ct_tuple.dst.protonum == 0 && ip_ct_tuple_equal(&i->tuple, tuple))
889 || (i->ct_tuple.dst.protonum && ip_ct_tuple_equal(&i->ct_tuple, tuple)))
890 && ip_ct_tuple_equal(&i->mask, mask));
893 /* Would two expected things clash? */
894 static inline int expect_clash(const struct ip_conntrack_expect *i,
895 const struct ip_conntrack_tuple *tuple,
896 const struct ip_conntrack_tuple *mask)
898 /* Part covered by intersection of masks must be unequal,
899 otherwise they clash */
900 struct ip_conntrack_tuple intersect_mask
901 = { { i->mask.src.ip & mask->src.ip,
902 { i->mask.src.u.all & mask->src.u.all } },
903 { i->mask.dst.ip & mask->dst.ip,
904 { i->mask.dst.u.all & mask->dst.u.all },
905 i->mask.dst.protonum & mask->dst.protonum } };
907 return ip_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask);
910 inline void ip_conntrack_unexpect_related(struct ip_conntrack_expect *expect)
912 WRITE_LOCK(&ip_conntrack_lock);
913 unexpect_related(expect);
914 WRITE_UNLOCK(&ip_conntrack_lock);
917 static void expectation_timed_out(unsigned long ul_expect)
919 struct ip_conntrack_expect *expect = (void *) ul_expect;
921 DEBUGP("expectation %p timed out\n", expect);
922 WRITE_LOCK(&ip_conntrack_lock);
923 __unexpect_related(expect);
924 WRITE_UNLOCK(&ip_conntrack_lock);
927 struct ip_conntrack_expect *
928 ip_conntrack_expect_alloc(void)
930 struct ip_conntrack_expect *new;
932 new = (struct ip_conntrack_expect *)
933 kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC);
935 DEBUGP("expect_related: OOM allocating expect\n");
939 /* tuple_cmp compares whole union, we have to initialized cleanly */
940 memset(new, 0, sizeof(struct ip_conntrack_expect));
946 ip_conntrack_expect_insert(struct ip_conntrack_expect *new,
947 struct ip_conntrack *related_to)
949 DEBUGP("new expectation %p of conntrack %p\n", new, related_to);
950 new->expectant = related_to;
952 atomic_set(&new->use, 1);
954 /* add to expected list for this connection */
955 list_add_tail(&new->expected_list, &related_to->sibling_list);
956 /* add to global list of expectations */
957 list_prepend(&ip_conntrack_expect_list, &new->list);
958 /* add and start timer if required */
959 if (related_to->helper->timeout) {
960 init_timer(&new->timeout);
961 new->timeout.data = (unsigned long)new;
962 new->timeout.function = expectation_timed_out;
963 new->timeout.expires = jiffies +
964 related_to->helper->timeout * HZ;
965 add_timer(&new->timeout);
967 related_to->expecting++;
970 /* Add a related connection. */
971 int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
972 struct ip_conntrack *related_to)
974 struct ip_conntrack_expect *old;
977 WRITE_LOCK(&ip_conntrack_lock);
978 /* Because of the write lock, no reader can walk the lists,
979 * so there is no need to use the tuple lock too */
981 DEBUGP("ip_conntrack_expect_related %p\n", related_to);
982 DEBUGP("tuple: "); DUMP_TUPLE_RAW(&expect->tuple);
983 DEBUGP("mask: "); DUMP_TUPLE_RAW(&expect->mask);
985 old = LIST_FIND(&ip_conntrack_expect_list, resent_expect,
986 struct ip_conntrack_expect *, &expect->tuple,
989 /* Helper private data may contain offsets but no pointers
990 pointing into the payload - otherwise we should have to copy
991 the data filled out by the helper over the old one */
992 DEBUGP("expect_related: resent packet\n");
993 if (related_to->helper->timeout) {
994 if (!del_timer(&old->timeout)) {
995 /* expectation is dying. Fall through */
998 old->timeout.expires = jiffies +
999 related_to->helper->timeout * HZ;
1000 add_timer(&old->timeout);
1004 WRITE_UNLOCK(&ip_conntrack_lock);
1008 } else if (related_to->helper->max_expected &&
1009 related_to->expecting >= related_to->helper->max_expected) {
1011 if (!(related_to->helper->flags &
1012 IP_CT_HELPER_F_REUSE_EXPECT)) {
1013 WRITE_UNLOCK(&ip_conntrack_lock);
1014 if (net_ratelimit())
1016 "ip_conntrack: max number of expected "
1017 "connections %i of %s reached for "
1018 "%u.%u.%u.%u->%u.%u.%u.%u\n",
1019 related_to->helper->max_expected,
1020 related_to->helper->name,
1021 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
1022 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
1026 DEBUGP("ip_conntrack: max number of expected "
1027 "connections %i of %s reached for "
1028 "%u.%u.%u.%u->%u.%u.%u.%u, reusing\n",
1029 related_to->helper->max_expected,
1030 related_to->helper->name,
1031 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
1032 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
1034 /* choose the the oldest expectation to evict */
1035 list_for_each_entry(old, &related_to->sibling_list,
1037 if (old->sibling == NULL)
1040 /* We cannot fail since related_to->expecting is the number
1041 * of unconfirmed expectations */
1042 IP_NF_ASSERT(old && old->sibling == NULL);
1044 /* newnat14 does not reuse the real allocated memory
1045 * structures but rather unexpects the old and
1046 * allocates a new. unexpect_related will decrement
1047 * related_to->expecting.
1049 unexpect_related(old);
1051 } else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash,
1052 struct ip_conntrack_expect *, &expect->tuple,
1054 WRITE_UNLOCK(&ip_conntrack_lock);
1055 DEBUGP("expect_related: busy!\n");
1061 out: ip_conntrack_expect_insert(expect, related_to);
1063 WRITE_UNLOCK(&ip_conntrack_lock);
1068 /* Change tuple in an existing expectation */
1069 int ip_conntrack_change_expect(struct ip_conntrack_expect *expect,
1070 struct ip_conntrack_tuple *newtuple)
1074 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
1075 WRITE_LOCK(&ip_conntrack_expect_tuple_lock);
1076 DEBUGP("change_expect:\n");
1077 DEBUGP("exp tuple: "); DUMP_TUPLE_RAW(&expect->tuple);
1078 DEBUGP("exp mask: "); DUMP_TUPLE_RAW(&expect->mask);
1079 DEBUGP("newtuple: "); DUMP_TUPLE_RAW(newtuple);
1080 if (expect->ct_tuple.dst.protonum == 0) {
1081 /* Never seen before */
1082 DEBUGP("change expect: never seen before\n");
1083 if (!ip_ct_tuple_mask_cmp(&expect->tuple, newtuple, &expect->mask)
1084 && LIST_FIND(&ip_conntrack_expect_list, expect_clash,
1085 struct ip_conntrack_expect *, newtuple, &expect->mask)) {
1086 /* Force NAT to find an unused tuple */
1089 memcpy(&expect->ct_tuple, &expect->tuple, sizeof(expect->tuple));
1090 memcpy(&expect->tuple, newtuple, sizeof(expect->tuple));
1095 DEBUGP("change expect: resent packet\n");
1096 if (ip_ct_tuple_equal(&expect->tuple, newtuple)) {
1099 /* Force NAT to choose again the same port */
1103 WRITE_UNLOCK(&ip_conntrack_expect_tuple_lock);
1108 /* Alter reply tuple (maybe alter helper). If it's already taken,
1109 return 0 and don't do alteration. */
1110 int ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
1111 const struct ip_conntrack_tuple *newreply)
1113 WRITE_LOCK(&ip_conntrack_lock);
1114 if (__ip_conntrack_find(newreply, conntrack)) {
1115 WRITE_UNLOCK(&ip_conntrack_lock);
1118 /* Should be unconfirmed, so not in hash table yet */
1119 IP_NF_ASSERT(!is_confirmed(conntrack));
1121 DEBUGP("Altering reply tuple of %p to ", conntrack);
1122 DUMP_TUPLE(newreply);
1124 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1125 if (!conntrack->master && list_empty(&conntrack->sibling_list))
1126 conntrack->helper = ip_ct_find_helper(newreply);
1127 WRITE_UNLOCK(&ip_conntrack_lock);
1132 int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
1134 WRITE_LOCK(&ip_conntrack_lock);
1135 list_prepend(&helpers, me);
1136 WRITE_UNLOCK(&ip_conntrack_lock);
1141 static inline int unhelp(struct ip_conntrack_tuple_hash *i,
1142 const struct ip_conntrack_helper *me)
1144 if (i->ctrack->helper == me) {
1145 /* Get rid of any expected. */
1146 remove_expectations(i->ctrack, 0);
1147 /* And *then* set helper to NULL */
1148 i->ctrack->helper = NULL;
1153 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1157 /* Need write lock here, to delete helper. */
1158 WRITE_LOCK(&ip_conntrack_lock);
1159 LIST_DELETE(&helpers, me);
1161 /* Get rid of expecteds, set helpers to NULL. */
1162 for (i = 0; i < ip_conntrack_htable_size; i++)
1163 LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
1164 struct ip_conntrack_tuple_hash *, me);
1165 WRITE_UNLOCK(&ip_conntrack_lock);
1167 /* Someone could be still looking at the helper in a bh. */
1171 static inline void ct_add_counters(struct ip_conntrack *ct,
1172 enum ip_conntrack_info ctinfo,
1173 const struct sk_buff *skb)
1175 #ifdef CONFIG_IP_NF_CT_ACCT
1177 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1178 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1179 ntohs(skb->nh.iph->tot_len);
1184 /* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */
1185 void ip_ct_refresh_acct(struct ip_conntrack *ct,
1186 enum ip_conntrack_info ctinfo,
1187 const struct sk_buff *skb,
1188 unsigned long extra_jiffies)
1190 IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
1192 /* If not in hash table, timer will not be active yet */
1193 if (!is_confirmed(ct)) {
1194 ct->timeout.expires = extra_jiffies;
1195 ct_add_counters(ct, ctinfo, skb);
1197 WRITE_LOCK(&ip_conntrack_lock);
1198 /* Need del_timer for race avoidance (may already be dying). */
1199 if (del_timer(&ct->timeout)) {
1200 ct->timeout.expires = jiffies + extra_jiffies;
1201 add_timer(&ct->timeout);
1203 ct_add_counters(ct, ctinfo, skb);
1204 WRITE_UNLOCK(&ip_conntrack_lock);
1208 /* Returns new sk_buff, or NULL */
1210 ip_ct_gather_frags(struct sk_buff *skb)
1212 struct sock *sk = skb->sk;
1213 #ifdef CONFIG_NETFILTER_DEBUG
1214 unsigned int olddebug = skb->nf_debug;
1222 skb = ip_defrag(skb);
1232 skb_set_owner_w(skb, sk);
1236 ip_send_check(skb->nh.iph);
1237 skb->nfcache |= NFC_ALTERED;
1238 #ifdef CONFIG_NETFILTER_DEBUG
1239 /* Packet path as if nothing had happened. */
1240 skb->nf_debug = olddebug;
1245 /* Used by ipt_REJECT. */
1246 static void ip_conntrack_attach(struct sk_buff *nskb, struct nf_ct_info *nfct)
1248 struct ip_conntrack *ct;
1249 enum ip_conntrack_info ctinfo;
1251 ct = __ip_conntrack_get(nfct, &ctinfo);
1253 /* This ICMP is in reverse direction to the packet which
1255 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1256 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1258 ctinfo = IP_CT_RELATED;
1260 /* Attach new skbuff, and increment count */
1261 nskb->nfct = &ct->infos[ctinfo];
1262 atomic_inc(&ct->ct_general.use);
1266 do_kill(const struct ip_conntrack_tuple_hash *i,
1267 int (*kill)(const struct ip_conntrack *i, void *data),
1270 return kill(i->ctrack, data);
1273 /* Bring out ya dead! */
1274 static struct ip_conntrack_tuple_hash *
1275 get_next_corpse(int (*kill)(const struct ip_conntrack *i, void *data),
1276 void *data, unsigned int *bucket)
1278 struct ip_conntrack_tuple_hash *h = NULL;
1280 READ_LOCK(&ip_conntrack_lock);
1281 for (; !h && *bucket < ip_conntrack_htable_size; (*bucket)++) {
1282 h = LIST_FIND(&ip_conntrack_hash[*bucket], do_kill,
1283 struct ip_conntrack_tuple_hash *, kill, data);
1286 atomic_inc(&h->ctrack->ct_general.use);
1287 READ_UNLOCK(&ip_conntrack_lock);
1293 ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data),
1296 struct ip_conntrack_tuple_hash *h;
1297 unsigned int bucket = 0;
1299 while ((h = get_next_corpse(kill, data, &bucket)) != NULL) {
1300 /* Time to push up daises... */
1301 if (del_timer(&h->ctrack->timeout))
1302 death_by_timeout((unsigned long)h->ctrack);
1303 /* ... else the timer will get him soon. */
1305 ip_conntrack_put(h->ctrack);
1309 /* Fast function for those who don't want to parse /proc (and I don't
1311 /* Reversing the socket's dst/src point of view gives us the reply
1314 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
1316 struct inet_opt *inet = inet_sk(sk);
1317 struct ip_conntrack_tuple_hash *h;
1318 struct ip_conntrack_tuple tuple;
1320 IP_CT_TUPLE_U_BLANK(&tuple);
1321 tuple.src.ip = inet->rcv_saddr;
1322 tuple.src.u.tcp.port = inet->sport;
1323 tuple.dst.ip = inet->daddr;
1324 tuple.dst.u.tcp.port = inet->dport;
1325 tuple.dst.protonum = IPPROTO_TCP;
1327 /* We only do TCP at the moment: is there a better way? */
1328 if (strcmp(sk->sk_prot->name, "TCP")) {
1329 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
1330 return -ENOPROTOOPT;
1333 if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
1334 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
1335 *len, sizeof(struct sockaddr_in));
1339 h = ip_conntrack_find_get(&tuple, NULL);
1341 struct sockaddr_in sin;
1343 sin.sin_family = AF_INET;
1344 sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
1345 .tuple.dst.u.tcp.port;
1346 sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
1349 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
1350 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1351 ip_conntrack_put(h->ctrack);
1352 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
1357 DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
1358 NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
1359 NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
1363 static struct nf_sockopt_ops so_getorigdst = {
1365 .get_optmin = SO_ORIGINAL_DST,
1366 .get_optmax = SO_ORIGINAL_DST+1,
1370 static int kill_all(const struct ip_conntrack *i, void *data)
1375 /* Mishearing the voices in his head, our hero wonders how he's
1376 supposed to kill the mall. */
1377 void ip_conntrack_cleanup(void)
1379 ip_ct_attach = NULL;
1380 /* This makes sure all current packets have passed through
1381 netfilter framework. Roll on, two-stage module
1386 ip_ct_selective_cleanup(kill_all, NULL);
1387 if (atomic_read(&ip_conntrack_count) != 0) {
1389 goto i_see_dead_people;
1392 kmem_cache_destroy(ip_conntrack_cachep);
1393 vfree(ip_conntrack_hash);
1394 nf_unregister_sockopt(&so_getorigdst);
1397 static int hashsize;
1398 MODULE_PARM(hashsize, "i");
1400 int __init ip_conntrack_init(void)
1405 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1406 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1408 ip_conntrack_htable_size = hashsize;
1410 ip_conntrack_htable_size
1411 = (((num_physpages << PAGE_SHIFT) / 16384)
1412 / sizeof(struct list_head));
1413 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1414 ip_conntrack_htable_size = 8192;
1415 if (ip_conntrack_htable_size < 16)
1416 ip_conntrack_htable_size = 16;
1418 ip_conntrack_max = 8 * ip_conntrack_htable_size;
1420 printk("ip_conntrack version %s (%u buckets, %d max)"
1421 " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
1422 ip_conntrack_htable_size, ip_conntrack_max,
1423 sizeof(struct ip_conntrack));
1425 ret = nf_register_sockopt(&so_getorigdst);
1427 printk(KERN_ERR "Unable to register netfilter socket option\n");
1431 ip_conntrack_hash = vmalloc(sizeof(struct list_head)
1432 * ip_conntrack_htable_size);
1433 if (!ip_conntrack_hash) {
1434 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1435 goto err_unreg_sockopt;
1438 ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
1439 sizeof(struct ip_conntrack), 0,
1440 SLAB_HWCACHE_ALIGN, NULL, NULL);
1441 if (!ip_conntrack_cachep) {
1442 printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
1445 /* Don't NEED lock here, but good form anyway. */
1446 WRITE_LOCK(&ip_conntrack_lock);
1447 /* Sew in builtin protocols. */
1448 list_append(&protocol_list, &ip_conntrack_protocol_tcp);
1449 list_append(&protocol_list, &ip_conntrack_protocol_udp);
1450 list_append(&protocol_list, &ip_conntrack_protocol_icmp);
1451 WRITE_UNLOCK(&ip_conntrack_lock);
1453 for (i = 0; i < ip_conntrack_htable_size; i++)
1454 INIT_LIST_HEAD(&ip_conntrack_hash[i]);
1456 /* For use by ipt_REJECT */
1457 ip_ct_attach = ip_conntrack_attach;
1459 /* Set up fake conntrack:
1460 - to never be deleted, not in any hashes */
1461 atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1462 /* - and look it like as a confirmed connection */
1463 set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1464 /* - and prepare the ctinfo field for REJECT & NAT. */
1465 ip_conntrack_untracked.infos[IP_CT_NEW].master =
1466 ip_conntrack_untracked.infos[IP_CT_RELATED].master =
1467 ip_conntrack_untracked.infos[IP_CT_RELATED + IP_CT_IS_REPLY].master =
1468 &ip_conntrack_untracked.ct_general;
1473 vfree(ip_conntrack_hash);
1475 nf_unregister_sockopt(&so_getorigdst);