1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
12 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
13 * - new API and handling of conntrack/nat helpers
14 * - now capable of multiple expectations for one master
15 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
16 * - add usage/reference counts to ip_conntrack_expect
17 * - export ip_conntrack[_expect]_{find_get,put} functions
20 #include <linux/config.h>
21 #include <linux/types.h>
22 #include <linux/icmp.h>
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/module.h>
27 #include <linux/skbuff.h>
28 #include <linux/proc_fs.h>
29 #include <linux/vmalloc.h>
30 #include <net/checksum.h>
32 #include <linux/stddef.h>
33 #include <linux/sysctl.h>
34 #include <linux/slab.h>
35 #include <linux/random.h>
36 #include <linux/jhash.h>
37 #include <linux/err.h>
38 #include <linux/percpu.h>
39 #include <linux/moduleparam.h>
40 #include <linux/notifier.h>
42 /* ip_conntrack_lock protects the main hash table, protocol/helper/expected
43 registrations, conntrack timers*/
44 #define ASSERT_READ_LOCK(x)
45 #define ASSERT_WRITE_LOCK(x)
47 #include <linux/netfilter_ipv4/ip_conntrack.h>
48 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
49 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
50 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
51 #include <linux/netfilter_ipv4/listhelp.h>
53 #define IP_CONNTRACK_VERSION "2.4"
58 #define DEBUGP(format, args...)
61 DEFINE_RWLOCK(ip_conntrack_lock);
63 /* ip_conntrack_standalone needs this */
64 atomic_t ip_conntrack_count = ATOMIC_INIT(0);
66 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
67 LIST_HEAD(ip_conntrack_expect_list);
68 struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
69 static LIST_HEAD(helpers);
70 unsigned int ip_conntrack_htable_size = 0;
72 struct list_head *ip_conntrack_hash;
73 static kmem_cache_t *ip_conntrack_cachep __read_mostly;
74 static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
75 struct ip_conntrack ip_conntrack_untracked;
76 unsigned int ip_ct_log_invalid;
77 static LIST_HEAD(unconfirmed);
78 static int ip_conntrack_vmalloc;
80 static unsigned int ip_conntrack_next_id;
81 static unsigned int ip_conntrack_expect_next_id;
82 #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
83 ATOMIC_NOTIFIER_HEAD(ip_conntrack_chain);
84 ATOMIC_NOTIFIER_HEAD(ip_conntrack_expect_chain);
86 DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
88 /* deliver cached events and clear cache entry - must be called with locally
89 * disabled softirqs */
91 __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
93 DEBUGP("ecache: delivering events for %p\n", ecache->ct);
94 if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
95 atomic_notifier_call_chain(&ip_conntrack_chain, ecache->events,
98 ip_conntrack_put(ecache->ct);
102 /* Deliver all cached events for a particular conntrack. This is called
103 * by code prior to async packet handling or freeing the skb */
104 void ip_ct_deliver_cached_events(const struct ip_conntrack *ct)
106 struct ip_conntrack_ecache *ecache;
109 ecache = &__get_cpu_var(ip_conntrack_ecache);
110 if (ecache->ct == ct)
111 __ip_ct_deliver_cached_events(ecache);
115 void __ip_ct_event_cache_init(struct ip_conntrack *ct)
117 struct ip_conntrack_ecache *ecache;
119 /* take care of delivering potentially old events */
120 ecache = &__get_cpu_var(ip_conntrack_ecache);
121 BUG_ON(ecache->ct == ct);
123 __ip_ct_deliver_cached_events(ecache);
124 /* initialize for this conntrack/packet */
126 nf_conntrack_get(&ct->ct_general);
129 /* flush the event cache - touches other CPU's data and must not be called while
130 * packets are still passing through the code */
131 static void ip_ct_event_cache_flush(void)
133 struct ip_conntrack_ecache *ecache;
136 for_each_possible_cpu(cpu) {
137 ecache = &per_cpu(ip_conntrack_ecache, cpu);
139 ip_conntrack_put(ecache->ct);
143 static inline void ip_ct_event_cache_flush(void) {}
144 #endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
146 DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
148 static int ip_conntrack_hash_rnd_initted;
149 static unsigned int ip_conntrack_hash_rnd;
151 static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
152 unsigned int size, unsigned int rnd)
154 return (jhash_3words(tuple->src.ip,
155 (tuple->dst.ip ^ tuple->dst.protonum),
156 (tuple->src.u.all | (tuple->dst.u.all << 16)),
161 hash_conntrack(const struct ip_conntrack_tuple *tuple)
163 return __hash_conntrack(tuple, ip_conntrack_htable_size,
164 ip_conntrack_hash_rnd);
168 ip_ct_get_tuple(const struct iphdr *iph,
169 const struct sk_buff *skb,
170 unsigned int dataoff,
171 struct ip_conntrack_tuple *tuple,
172 const struct ip_conntrack_protocol *protocol)
175 if (iph->frag_off & htons(IP_OFFSET)) {
176 printk("ip_conntrack_core: Frag of proto %u.\n",
181 tuple->src.ip = iph->saddr;
182 tuple->dst.ip = iph->daddr;
183 tuple->dst.protonum = iph->protocol;
184 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
186 return protocol->pkt_to_tuple(skb, dataoff, tuple);
190 ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
191 const struct ip_conntrack_tuple *orig,
192 const struct ip_conntrack_protocol *protocol)
194 inverse->src.ip = orig->dst.ip;
195 inverse->dst.ip = orig->src.ip;
196 inverse->dst.protonum = orig->dst.protonum;
197 inverse->dst.dir = !orig->dst.dir;
199 return protocol->invert_tuple(inverse, orig);
203 /* ip_conntrack_expect helper functions */
204 void ip_ct_unlink_expect(struct ip_conntrack_expect *exp)
206 ASSERT_WRITE_LOCK(&ip_conntrack_lock);
207 IP_NF_ASSERT(!timer_pending(&exp->timeout));
208 list_del(&exp->list);
209 CONNTRACK_STAT_INC(expect_delete);
210 exp->master->expecting--;
211 ip_conntrack_expect_put(exp);
214 static void expectation_timed_out(unsigned long ul_expect)
216 struct ip_conntrack_expect *exp = (void *)ul_expect;
218 write_lock_bh(&ip_conntrack_lock);
219 ip_ct_unlink_expect(exp);
220 write_unlock_bh(&ip_conntrack_lock);
221 ip_conntrack_expect_put(exp);
224 struct ip_conntrack_expect *
225 __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
227 struct ip_conntrack_expect *i;
229 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
230 if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
238 /* Just find a expectation corresponding to a tuple. */
239 struct ip_conntrack_expect *
240 ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
242 struct ip_conntrack_expect *i;
244 read_lock_bh(&ip_conntrack_lock);
245 i = __ip_conntrack_expect_find(tuple);
246 read_unlock_bh(&ip_conntrack_lock);
251 /* If an expectation for this connection is found, it gets delete from
252 * global list then returned. */
253 static struct ip_conntrack_expect *
254 find_expectation(const struct ip_conntrack_tuple *tuple)
256 struct ip_conntrack_expect *i;
258 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
259 /* If master is not in hash table yet (ie. packet hasn't left
260 this machine yet), how can other end know about expected?
261 Hence these are not the droids you are looking for (if
262 master ct never got confirmed, we'd hold a reference to it
263 and weird things would happen to future packets). */
264 if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
265 && is_confirmed(i->master)) {
266 if (i->flags & IP_CT_EXPECT_PERMANENT) {
269 } else if (del_timer(&i->timeout)) {
270 ip_ct_unlink_expect(i);
278 /* delete all expectations for this conntrack */
279 void ip_ct_remove_expectations(struct ip_conntrack *ct)
281 struct ip_conntrack_expect *i, *tmp;
283 /* Optimization: most connection never expect any others. */
284 if (ct->expecting == 0)
287 list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
288 if (i->master == ct && del_timer(&i->timeout)) {
289 ip_ct_unlink_expect(i);
290 ip_conntrack_expect_put(i);
296 clean_from_lists(struct ip_conntrack *ct)
300 DEBUGP("clean_from_lists(%p)\n", ct);
301 ASSERT_WRITE_LOCK(&ip_conntrack_lock);
303 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
304 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
305 LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
306 LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
308 /* Destroy all pending expectations */
309 ip_ct_remove_expectations(ct);
313 destroy_conntrack(struct nf_conntrack *nfct)
315 struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
316 struct ip_conntrack_protocol *proto;
318 DEBUGP("destroy_conntrack(%p)\n", ct);
319 IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
320 IP_NF_ASSERT(!timer_pending(&ct->timeout));
322 ip_conntrack_event(IPCT_DESTROY, ct);
323 set_bit(IPS_DYING_BIT, &ct->status);
325 /* To make sure we don't get any weird locking issues here:
326 * destroy_conntrack() MUST NOT be called with a write lock
327 * to ip_conntrack_lock!!! -HW */
328 proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
329 if (proto && proto->destroy)
332 if (ip_conntrack_destroyed)
333 ip_conntrack_destroyed(ct);
335 write_lock_bh(&ip_conntrack_lock);
336 /* Expectations will have been removed in clean_from_lists,
337 * except TFTP can create an expectation on the first packet,
338 * before connection is in the list, so we need to clean here,
340 ip_ct_remove_expectations(ct);
342 /* We overload first tuple to link into unconfirmed list. */
343 if (!is_confirmed(ct)) {
344 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
345 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
348 CONNTRACK_STAT_INC(delete);
349 write_unlock_bh(&ip_conntrack_lock);
352 ip_conntrack_put(ct->master);
354 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
355 ip_conntrack_free(ct);
358 static void death_by_timeout(unsigned long ul_conntrack)
360 struct ip_conntrack *ct = (void *)ul_conntrack;
362 write_lock_bh(&ip_conntrack_lock);
363 /* Inside lock so preempt is disabled on module removal path.
364 * Otherwise we can get spurious warnings. */
365 CONNTRACK_STAT_INC(delete_list);
366 clean_from_lists(ct);
367 write_unlock_bh(&ip_conntrack_lock);
368 ip_conntrack_put(ct);
372 conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
373 const struct ip_conntrack_tuple *tuple,
374 const struct ip_conntrack *ignored_conntrack)
376 ASSERT_READ_LOCK(&ip_conntrack_lock);
377 return tuplehash_to_ctrack(i) != ignored_conntrack
378 && ip_ct_tuple_equal(tuple, &i->tuple);
381 struct ip_conntrack_tuple_hash *
382 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
383 const struct ip_conntrack *ignored_conntrack)
385 struct ip_conntrack_tuple_hash *h;
386 unsigned int hash = hash_conntrack(tuple);
388 ASSERT_READ_LOCK(&ip_conntrack_lock);
389 list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
390 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
391 CONNTRACK_STAT_INC(found);
394 CONNTRACK_STAT_INC(searched);
400 /* Find a connection corresponding to a tuple. */
401 struct ip_conntrack_tuple_hash *
402 ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
403 const struct ip_conntrack *ignored_conntrack)
405 struct ip_conntrack_tuple_hash *h;
407 read_lock_bh(&ip_conntrack_lock);
408 h = __ip_conntrack_find(tuple, ignored_conntrack);
410 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
411 read_unlock_bh(&ip_conntrack_lock);
416 static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
418 unsigned int repl_hash)
420 ct->id = ++ip_conntrack_next_id;
421 list_prepend(&ip_conntrack_hash[hash],
422 &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
423 list_prepend(&ip_conntrack_hash[repl_hash],
424 &ct->tuplehash[IP_CT_DIR_REPLY].list);
427 void ip_conntrack_hash_insert(struct ip_conntrack *ct)
429 unsigned int hash, repl_hash;
431 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
432 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
434 write_lock_bh(&ip_conntrack_lock);
435 __ip_conntrack_hash_insert(ct, hash, repl_hash);
436 write_unlock_bh(&ip_conntrack_lock);
439 /* Confirm a connection given skb; places it in hash table */
441 __ip_conntrack_confirm(struct sk_buff **pskb)
443 unsigned int hash, repl_hash;
444 struct ip_conntrack *ct;
445 enum ip_conntrack_info ctinfo;
447 ct = ip_conntrack_get(*pskb, &ctinfo);
449 /* ipt_REJECT uses ip_conntrack_attach to attach related
450 ICMP/TCP RST packets in other direction. Actual packet
451 which created connection will be IP_CT_NEW or for an
452 expected connection, IP_CT_RELATED. */
453 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
456 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
457 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
459 /* We're not in hash table, and we refuse to set up related
460 connections for unconfirmed conns. But packet copies and
461 REJECT will give spurious warnings here. */
462 /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
464 /* No external references means noone else could have
466 IP_NF_ASSERT(!is_confirmed(ct));
467 DEBUGP("Confirming conntrack %p\n", ct);
469 write_lock_bh(&ip_conntrack_lock);
471 /* See if there's one in the list already, including reverse:
472 NAT could have grabbed it without realizing, since we're
473 not in the hash. If there is, we lost race. */
474 if (!LIST_FIND(&ip_conntrack_hash[hash],
476 struct ip_conntrack_tuple_hash *,
477 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
478 && !LIST_FIND(&ip_conntrack_hash[repl_hash],
480 struct ip_conntrack_tuple_hash *,
481 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
482 /* Remove from unconfirmed list */
483 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
485 __ip_conntrack_hash_insert(ct, hash, repl_hash);
486 /* Timer relative to confirmation time, not original
487 setting time, otherwise we'd get timer wrap in
488 weird delay cases. */
489 ct->timeout.expires += jiffies;
490 add_timer(&ct->timeout);
491 atomic_inc(&ct->ct_general.use);
492 set_bit(IPS_CONFIRMED_BIT, &ct->status);
493 CONNTRACK_STAT_INC(insert);
494 write_unlock_bh(&ip_conntrack_lock);
496 ip_conntrack_event_cache(IPCT_HELPER, *pskb);
497 #ifdef CONFIG_IP_NF_NAT_NEEDED
498 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
499 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
500 ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
502 ip_conntrack_event_cache(master_ct(ct) ?
503 IPCT_RELATED : IPCT_NEW, *pskb);
508 CONNTRACK_STAT_INC(insert_failed);
509 write_unlock_bh(&ip_conntrack_lock);
514 /* Returns true if a connection correspondings to the tuple (required
517 ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
518 const struct ip_conntrack *ignored_conntrack)
520 struct ip_conntrack_tuple_hash *h;
522 read_lock_bh(&ip_conntrack_lock);
523 h = __ip_conntrack_find(tuple, ignored_conntrack);
524 read_unlock_bh(&ip_conntrack_lock);
529 /* There's a small race here where we may free a just-assured
530 connection. Too bad: we're in trouble anyway. */
531 static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
533 return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status));
536 static int early_drop(struct list_head *chain)
538 /* Traverse backwards: gives us oldest, which is roughly LRU */
539 struct ip_conntrack_tuple_hash *h;
540 struct ip_conntrack *ct = NULL;
543 read_lock_bh(&ip_conntrack_lock);
544 h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
546 ct = tuplehash_to_ctrack(h);
547 atomic_inc(&ct->ct_general.use);
549 read_unlock_bh(&ip_conntrack_lock);
554 if (del_timer(&ct->timeout)) {
555 death_by_timeout((unsigned long)ct);
557 CONNTRACK_STAT_INC(early_drop);
559 ip_conntrack_put(ct);
563 static inline int helper_cmp(const struct ip_conntrack_helper *i,
564 const struct ip_conntrack_tuple *rtuple)
566 return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
569 static struct ip_conntrack_helper *
570 __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
572 return LIST_FIND(&helpers, helper_cmp,
573 struct ip_conntrack_helper *,
577 struct ip_conntrack_helper *
578 ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple)
580 struct ip_conntrack_helper *helper;
582 /* need ip_conntrack_lock to assure that helper exists until
583 * try_module_get() is called */
584 read_lock_bh(&ip_conntrack_lock);
586 helper = __ip_conntrack_helper_find(tuple);
588 /* need to increase module usage count to assure helper will
589 * not go away while the caller is e.g. busy putting a
590 * conntrack in the hash that uses the helper */
591 if (!try_module_get(helper->me))
595 read_unlock_bh(&ip_conntrack_lock);
600 void ip_conntrack_helper_put(struct ip_conntrack_helper *helper)
602 module_put(helper->me);
605 struct ip_conntrack_protocol *
606 __ip_conntrack_proto_find(u_int8_t protocol)
608 return ip_ct_protos[protocol];
611 /* this is guaranteed to always return a valid protocol helper, since
612 * it falls back to generic_protocol */
613 struct ip_conntrack_protocol *
614 ip_conntrack_proto_find_get(u_int8_t protocol)
616 struct ip_conntrack_protocol *p;
619 p = __ip_conntrack_proto_find(protocol);
621 if (!try_module_get(p->me))
622 p = &ip_conntrack_generic_protocol;
629 void ip_conntrack_proto_put(struct ip_conntrack_protocol *p)
634 struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
635 struct ip_conntrack_tuple *repl)
637 struct ip_conntrack *conntrack;
639 if (!ip_conntrack_hash_rnd_initted) {
640 get_random_bytes(&ip_conntrack_hash_rnd, 4);
641 ip_conntrack_hash_rnd_initted = 1;
645 && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
646 unsigned int hash = hash_conntrack(orig);
647 /* Try dropping from this hash chain. */
648 if (!early_drop(&ip_conntrack_hash[hash])) {
651 "ip_conntrack: table full, dropping"
653 return ERR_PTR(-ENOMEM);
657 conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
659 DEBUGP("Can't allocate conntrack.\n");
660 return ERR_PTR(-ENOMEM);
663 memset(conntrack, 0, sizeof(*conntrack));
664 atomic_set(&conntrack->ct_general.use, 1);
665 conntrack->ct_general.destroy = destroy_conntrack;
666 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
667 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
668 #if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE)
669 conntrack->xid[IP_CT_DIR_ORIGINAL] = -1;
670 conntrack->xid[IP_CT_DIR_REPLY] = -1;
671 conntrack->priority = (u_int32_t)-1;
673 /* Don't set timer yet: wait for confirmation */
674 init_timer(&conntrack->timeout);
675 conntrack->timeout.data = (unsigned long)conntrack;
676 conntrack->timeout.function = death_by_timeout;
678 atomic_inc(&ip_conntrack_count);
684 ip_conntrack_free(struct ip_conntrack *conntrack)
686 atomic_dec(&ip_conntrack_count);
687 kmem_cache_free(ip_conntrack_cachep, conntrack);
690 /* Allocate a new conntrack: we return -ENOMEM if classification
691 * failed due to stress. Otherwise it really is unclassifiable */
692 static struct ip_conntrack_tuple_hash *
693 init_conntrack(struct ip_conntrack_tuple *tuple,
694 struct ip_conntrack_protocol *protocol,
697 struct ip_conntrack *conntrack;
698 struct ip_conntrack_tuple repl_tuple;
699 struct ip_conntrack_expect *exp;
701 if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
702 DEBUGP("Can't invert tuple.\n");
706 conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
707 if (conntrack == NULL || IS_ERR(conntrack))
708 return (struct ip_conntrack_tuple_hash *)conntrack;
710 if (!protocol->new(conntrack, skb)) {
711 ip_conntrack_free(conntrack);
715 write_lock_bh(&ip_conntrack_lock);
716 exp = find_expectation(tuple);
719 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
721 /* Welcome, Mr. Bond. We've been expecting you... */
722 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
723 conntrack->master = exp->master;
724 #ifdef CONFIG_IP_NF_CONNTRACK_MARK
725 conntrack->mark = exp->master->mark;
727 #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
728 defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
729 /* this is ugly, but there is no other place where to put it */
730 conntrack->nat.masq_index = exp->master->nat.masq_index;
732 nf_conntrack_get(&conntrack->master->ct_general);
733 CONNTRACK_STAT_INC(expect_new);
735 conntrack->helper = __ip_conntrack_helper_find(&repl_tuple);
737 CONNTRACK_STAT_INC(new);
740 /* Overload tuple linked list to put us in unconfirmed list. */
741 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
743 write_unlock_bh(&ip_conntrack_lock);
747 exp->expectfn(conntrack, exp);
748 ip_conntrack_expect_put(exp);
751 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
754 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
755 static inline struct ip_conntrack *
756 resolve_normal_ct(struct sk_buff *skb,
757 struct ip_conntrack_protocol *proto,
759 unsigned int hooknum,
760 enum ip_conntrack_info *ctinfo)
762 struct ip_conntrack_tuple tuple;
763 struct ip_conntrack_tuple_hash *h;
764 struct ip_conntrack *ct;
766 IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
768 if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4,
772 /* look for tuple match */
773 h = ip_conntrack_find_get(&tuple, NULL);
775 h = init_conntrack(&tuple, proto, skb);
781 ct = tuplehash_to_ctrack(h);
783 /* It exists; we have (non-exclusive) reference. */
784 if (DIRECTION(h) == IP_CT_DIR_REPLY) {
785 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
786 /* Please set reply bit if this packet OK */
789 /* Once we've had two way comms, always ESTABLISHED. */
790 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
791 DEBUGP("ip_conntrack_in: normal packet for %p\n",
793 *ctinfo = IP_CT_ESTABLISHED;
794 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
795 DEBUGP("ip_conntrack_in: related packet for %p\n",
797 *ctinfo = IP_CT_RELATED;
799 DEBUGP("ip_conntrack_in: new packet for %p\n",
805 skb->nfct = &ct->ct_general;
806 skb->nfctinfo = *ctinfo;
810 /* Netfilter hook itself. */
811 unsigned int ip_conntrack_in(unsigned int hooknum,
812 struct sk_buff **pskb,
813 const struct net_device *in,
814 const struct net_device *out,
815 int (*okfn)(struct sk_buff *))
817 struct ip_conntrack *ct;
818 enum ip_conntrack_info ctinfo;
819 struct ip_conntrack_protocol *proto;
823 /* Previously seen (loopback or untracked)? Ignore. */
825 CONNTRACK_STAT_INC(ignore);
830 if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
831 if (net_ratelimit()) {
832 printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
833 (*pskb)->nh.iph->protocol, hooknum);
838 /* Doesn't cover locally-generated broadcast, so not worth it. */
840 /* Ignore broadcast: no `connection'. */
841 if ((*pskb)->pkt_type == PACKET_BROADCAST) {
842 printk("Broadcast packet!\n");
844 } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
845 == htonl(0x000000FF)) {
846 printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
847 NIPQUAD((*pskb)->nh.iph->saddr),
848 NIPQUAD((*pskb)->nh.iph->daddr),
849 (*pskb)->sk, (*pskb)->pkt_type);
853 proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol);
855 /* It may be an special packet, error, unclean...
856 * inverse of the return code tells to the netfilter
857 * core what to do with the packet. */
858 if (proto->error != NULL
859 && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
860 CONNTRACK_STAT_INC(error);
861 CONNTRACK_STAT_INC(invalid);
865 if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
866 /* Not valid part of a connection */
867 CONNTRACK_STAT_INC(invalid);
872 /* Too stressed to deal. */
873 CONNTRACK_STAT_INC(drop);
877 IP_NF_ASSERT((*pskb)->nfct);
879 ret = proto->packet(ct, *pskb, ctinfo);
881 /* Invalid: inverse of the return code tells
882 * the netfilter core what to do*/
883 nf_conntrack_put((*pskb)->nfct);
884 (*pskb)->nfct = NULL;
885 CONNTRACK_STAT_INC(invalid);
889 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
890 ip_conntrack_event_cache(IPCT_STATUS, *pskb);
895 int invert_tuplepr(struct ip_conntrack_tuple *inverse,
896 const struct ip_conntrack_tuple *orig)
898 return ip_ct_invert_tuple(inverse, orig,
899 __ip_conntrack_proto_find(orig->dst.protonum));
902 /* Would two expected things clash? */
903 static inline int expect_clash(const struct ip_conntrack_expect *a,
904 const struct ip_conntrack_expect *b)
906 /* Part covered by intersection of masks must be unequal,
907 otherwise they clash */
908 struct ip_conntrack_tuple intersect_mask
909 = { { a->mask.src.ip & b->mask.src.ip,
910 { a->mask.src.u.all & b->mask.src.u.all } },
911 { a->mask.dst.ip & b->mask.dst.ip,
912 { a->mask.dst.u.all & b->mask.dst.u.all },
913 a->mask.dst.protonum & b->mask.dst.protonum } };
915 return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
918 static inline int expect_matches(const struct ip_conntrack_expect *a,
919 const struct ip_conntrack_expect *b)
921 return a->master == b->master
922 && ip_ct_tuple_equal(&a->tuple, &b->tuple)
923 && ip_ct_tuple_equal(&a->mask, &b->mask);
926 /* Generally a bad idea to call this: could have matched already. */
927 void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
929 struct ip_conntrack_expect *i;
931 write_lock_bh(&ip_conntrack_lock);
932 /* choose the the oldest expectation to evict */
933 list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
934 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
935 ip_ct_unlink_expect(i);
936 write_unlock_bh(&ip_conntrack_lock);
937 ip_conntrack_expect_put(i);
941 write_unlock_bh(&ip_conntrack_lock);
944 /* We don't increase the master conntrack refcount for non-fulfilled
945 * conntracks. During the conntrack destruction, the expectations are
946 * always killed before the conntrack itself */
947 struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me)
949 struct ip_conntrack_expect *new;
951 new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
953 DEBUGP("expect_related: OOM allocating expect\n");
957 atomic_set(&new->use, 1);
961 void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
963 if (atomic_dec_and_test(&exp->use))
964 kmem_cache_free(ip_conntrack_expect_cachep, exp);
967 static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
969 atomic_inc(&exp->use);
970 exp->master->expecting++;
971 list_add(&exp->list, &ip_conntrack_expect_list);
973 init_timer(&exp->timeout);
974 exp->timeout.data = (unsigned long)exp;
975 exp->timeout.function = expectation_timed_out;
976 exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
977 add_timer(&exp->timeout);
979 exp->id = ++ip_conntrack_expect_next_id;
980 atomic_inc(&exp->use);
981 CONNTRACK_STAT_INC(expect_create);
984 /* Race with expectations being used means we could have none to find; OK. */
985 static void evict_oldest_expect(struct ip_conntrack *master)
987 struct ip_conntrack_expect *i;
989 list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
990 if (i->master == master) {
991 if (del_timer(&i->timeout)) {
992 ip_ct_unlink_expect(i);
993 ip_conntrack_expect_put(i);
1000 static inline int refresh_timer(struct ip_conntrack_expect *i)
1002 if (!del_timer(&i->timeout))
1005 i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
1006 add_timer(&i->timeout);
1010 int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
1012 struct ip_conntrack_expect *i;
1015 DEBUGP("ip_conntrack_expect_related %p\n", related_to);
1016 DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
1017 DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
1019 write_lock_bh(&ip_conntrack_lock);
1020 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
1021 if (expect_matches(i, expect)) {
1022 /* Refresh timer: if it's dying, ignore.. */
1023 if (refresh_timer(i)) {
1027 } else if (expect_clash(i, expect)) {
1033 /* Will be over limit? */
1034 if (expect->master->helper->max_expected &&
1035 expect->master->expecting >= expect->master->helper->max_expected)
1036 evict_oldest_expect(expect->master);
1038 ip_conntrack_expect_insert(expect);
1039 ip_conntrack_expect_event(IPEXP_NEW, expect);
1042 write_unlock_bh(&ip_conntrack_lock);
1046 /* Alter reply tuple (maybe alter helper). This is for NAT, and is
1047 implicitly racy: see __ip_conntrack_confirm */
1048 void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
1049 const struct ip_conntrack_tuple *newreply)
1051 write_lock_bh(&ip_conntrack_lock);
1052 /* Should be unconfirmed, so not in hash table yet */
1053 IP_NF_ASSERT(!is_confirmed(conntrack));
1055 DEBUGP("Altering reply tuple of %p to ", conntrack);
1056 DUMP_TUPLE(newreply);
1058 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1059 if (!conntrack->master && conntrack->expecting == 0)
1060 conntrack->helper = __ip_conntrack_helper_find(newreply);
1061 write_unlock_bh(&ip_conntrack_lock);
1064 int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
1066 BUG_ON(me->timeout == 0);
1067 write_lock_bh(&ip_conntrack_lock);
1068 list_prepend(&helpers, me);
1069 write_unlock_bh(&ip_conntrack_lock);
1074 struct ip_conntrack_helper *
1075 __ip_conntrack_helper_find_byname(const char *name)
1077 struct ip_conntrack_helper *h;
1079 list_for_each_entry(h, &helpers, list) {
1080 if (!strcmp(h->name, name))
1087 static inline int unhelp(struct ip_conntrack_tuple_hash *i,
1088 const struct ip_conntrack_helper *me)
1090 if (tuplehash_to_ctrack(i)->helper == me) {
1091 ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
1092 tuplehash_to_ctrack(i)->helper = NULL;
1097 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1100 struct ip_conntrack_expect *exp, *tmp;
1102 /* Need write lock here, to delete helper. */
1103 write_lock_bh(&ip_conntrack_lock);
1104 LIST_DELETE(&helpers, me);
1106 /* Get rid of expectations */
1107 list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
1108 if (exp->master->helper == me && del_timer(&exp->timeout)) {
1109 ip_ct_unlink_expect(exp);
1110 ip_conntrack_expect_put(exp);
1113 /* Get rid of expecteds, set helpers to NULL. */
1114 LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
1115 for (i = 0; i < ip_conntrack_htable_size; i++)
1116 LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
1117 struct ip_conntrack_tuple_hash *, me);
1118 write_unlock_bh(&ip_conntrack_lock);
1120 /* Someone could be still looking at the helper in a bh. */
1124 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1125 void __ip_ct_refresh_acct(struct ip_conntrack *ct,
1126 enum ip_conntrack_info ctinfo,
1127 const struct sk_buff *skb,
1128 unsigned long extra_jiffies,
1133 IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
1136 write_lock_bh(&ip_conntrack_lock);
1138 /* If not in hash table, timer will not be active yet */
1139 if (!is_confirmed(ct)) {
1140 ct->timeout.expires = extra_jiffies;
1141 event = IPCT_REFRESH;
1143 /* Need del_timer for race avoidance (may already be dying). */
1144 if (del_timer(&ct->timeout)) {
1145 ct->timeout.expires = jiffies + extra_jiffies;
1146 add_timer(&ct->timeout);
1147 event = IPCT_REFRESH;
1151 #ifdef CONFIG_IP_NF_CT_ACCT
1153 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1154 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1155 ntohs(skb->nh.iph->tot_len);
1156 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
1157 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
1158 event |= IPCT_COUNTER_FILLING;
1162 write_unlock_bh(&ip_conntrack_lock);
1164 /* must be unlocked when calling event cache */
1166 ip_conntrack_event_cache(event, skb);
1169 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
1170 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
1171 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1172 * in ip_conntrack_core, since we don't want the protocols to autoload
1173 * or depend on ctnetlink */
1174 int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb,
1175 const struct ip_conntrack_tuple *tuple)
1177 NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
1178 &tuple->src.u.tcp.port);
1179 NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
1180 &tuple->dst.u.tcp.port);
1187 int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
1188 struct ip_conntrack_tuple *t)
1190 if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
1194 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
1196 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
1202 /* Returns new sk_buff, or NULL */
1204 ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
1209 skb = ip_defrag(skb, user);
1213 ip_send_check(skb->nh.iph);
1217 /* Used by ipt_REJECT. */
1218 static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1220 struct ip_conntrack *ct;
1221 enum ip_conntrack_info ctinfo;
1223 /* This ICMP is in reverse direction to the packet which caused it */
1224 ct = ip_conntrack_get(skb, &ctinfo);
1226 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1227 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1229 ctinfo = IP_CT_RELATED;
1231 /* Attach to new skbuff, and increment count */
1232 nskb->nfct = &ct->ct_general;
1233 nskb->nfctinfo = ctinfo;
1234 nf_conntrack_get(nskb->nfct);
1238 do_iter(const struct ip_conntrack_tuple_hash *i,
1239 int (*iter)(struct ip_conntrack *i, void *data),
1242 return iter(tuplehash_to_ctrack(i), data);
1245 /* Bring out ya dead! */
1246 static struct ip_conntrack_tuple_hash *
1247 get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
1248 void *data, unsigned int *bucket)
1250 struct ip_conntrack_tuple_hash *h = NULL;
1252 write_lock_bh(&ip_conntrack_lock);
1253 for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
1254 h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
1255 struct ip_conntrack_tuple_hash *, iter, data);
1260 h = LIST_FIND_W(&unconfirmed, do_iter,
1261 struct ip_conntrack_tuple_hash *, iter, data);
1263 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
1264 write_unlock_bh(&ip_conntrack_lock);
1270 ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
1272 struct ip_conntrack_tuple_hash *h;
1273 unsigned int bucket = 0;
1275 while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
1276 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1277 /* Time to push up daises... */
1278 if (del_timer(&ct->timeout))
1279 death_by_timeout((unsigned long)ct);
1280 /* ... else the timer will get him soon. */
1282 ip_conntrack_put(ct);
1286 /* Fast function for those who don't want to parse /proc (and I don't
1288 /* Reversing the socket's dst/src point of view gives us the reply
1291 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
1293 struct inet_sock *inet = inet_sk(sk);
1294 struct ip_conntrack_tuple_hash *h;
1295 struct ip_conntrack_tuple tuple;
1297 IP_CT_TUPLE_U_BLANK(&tuple);
1298 tuple.src.ip = inet->rcv_saddr;
1299 tuple.src.u.tcp.port = inet->sport;
1300 tuple.dst.ip = inet->daddr;
1301 tuple.dst.u.tcp.port = inet->dport;
1302 tuple.dst.protonum = IPPROTO_TCP;
1304 /* We only do TCP at the moment: is there a better way? */
1305 if (strcmp(sk->sk_prot->name, "TCP")) {
1306 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
1307 return -ENOPROTOOPT;
1310 if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
1311 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
1312 *len, sizeof(struct sockaddr_in));
1316 h = ip_conntrack_find_get(&tuple, NULL);
1318 struct sockaddr_in sin;
1319 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1321 sin.sin_family = AF_INET;
1322 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1323 .tuple.dst.u.tcp.port;
1324 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1326 memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
1328 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
1329 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1330 ip_conntrack_put(ct);
1331 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
1336 DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
1337 NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
1338 NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
1342 static struct nf_sockopt_ops so_getorigdst = {
1344 .get_optmin = SO_ORIGINAL_DST,
1345 .get_optmax = SO_ORIGINAL_DST+1,
1349 static int kill_all(struct ip_conntrack *i, void *data)
1354 void ip_conntrack_flush(void)
1356 ip_ct_iterate_cleanup(kill_all, NULL);
1359 static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
1364 free_pages((unsigned long)hash,
1365 get_order(sizeof(struct list_head) * size));
1368 /* Mishearing the voices in his head, our hero wonders how he's
1369 supposed to kill the mall. */
1370 void ip_conntrack_cleanup(void)
1372 ip_ct_attach = NULL;
1374 /* This makes sure all current packets have passed through
1375 netfilter framework. Roll on, two-stage module
1379 ip_ct_event_cache_flush();
1381 ip_conntrack_flush();
1382 if (atomic_read(&ip_conntrack_count) != 0) {
1384 goto i_see_dead_people;
1386 /* wait until all references to ip_conntrack_untracked are dropped */
1387 while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
1390 kmem_cache_destroy(ip_conntrack_cachep);
1391 kmem_cache_destroy(ip_conntrack_expect_cachep);
1392 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
1393 ip_conntrack_htable_size);
1394 nf_unregister_sockopt(&so_getorigdst);
1397 static struct list_head *alloc_hashtable(int size, int *vmalloced)
1399 struct list_head *hash;
1403 hash = (void*)__get_free_pages(GFP_KERNEL,
1404 get_order(sizeof(struct list_head)
1408 printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
1409 hash = vmalloc(sizeof(struct list_head) * size);
1413 for (i = 0; i < size; i++)
1414 INIT_LIST_HEAD(&hash[i]);
1419 static int set_hashsize(const char *val, struct kernel_param *kp)
1421 int i, bucket, hashsize, vmalloced;
1422 int old_vmalloced, old_size;
1424 struct list_head *hash, *old_hash;
1425 struct ip_conntrack_tuple_hash *h;
1427 /* On boot, we can set this without any fancy locking. */
1428 if (!ip_conntrack_htable_size)
1429 return param_set_int(val, kp);
1431 hashsize = simple_strtol(val, NULL, 0);
1435 hash = alloc_hashtable(hashsize, &vmalloced);
1439 /* We have to rehash for the new table anyway, so we also can
1440 * use a new random seed */
1441 get_random_bytes(&rnd, 4);
1443 write_lock_bh(&ip_conntrack_lock);
1444 for (i = 0; i < ip_conntrack_htable_size; i++) {
1445 while (!list_empty(&ip_conntrack_hash[i])) {
1446 h = list_entry(ip_conntrack_hash[i].next,
1447 struct ip_conntrack_tuple_hash, list);
1449 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1450 list_add_tail(&h->list, &hash[bucket]);
1453 old_size = ip_conntrack_htable_size;
1454 old_vmalloced = ip_conntrack_vmalloc;
1455 old_hash = ip_conntrack_hash;
1457 ip_conntrack_htable_size = hashsize;
1458 ip_conntrack_vmalloc = vmalloced;
1459 ip_conntrack_hash = hash;
1460 ip_conntrack_hash_rnd = rnd;
1461 write_unlock_bh(&ip_conntrack_lock);
1463 free_conntrack_hash(old_hash, old_vmalloced, old_size);
1467 module_param_call(hashsize, set_hashsize, param_get_uint,
1468 &ip_conntrack_htable_size, 0600);
1470 int __init ip_conntrack_init(void)
1475 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1476 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1477 if (!ip_conntrack_htable_size) {
1478 ip_conntrack_htable_size
1479 = (((num_physpages << PAGE_SHIFT) / 16384)
1480 / sizeof(struct list_head));
1481 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1482 ip_conntrack_htable_size = 8192;
1483 if (ip_conntrack_htable_size < 16)
1484 ip_conntrack_htable_size = 16;
1486 ip_conntrack_max = 8 * ip_conntrack_htable_size;
1488 printk("ip_conntrack version %s (%u buckets, %d max)"
1489 " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
1490 ip_conntrack_htable_size, ip_conntrack_max,
1491 sizeof(struct ip_conntrack));
1493 ret = nf_register_sockopt(&so_getorigdst);
1495 printk(KERN_ERR "Unable to register netfilter socket option\n");
1499 ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
1500 &ip_conntrack_vmalloc);
1501 if (!ip_conntrack_hash) {
1502 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1503 goto err_unreg_sockopt;
1506 ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
1507 sizeof(struct ip_conntrack), 0,
1509 if (!ip_conntrack_cachep) {
1510 printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
1514 ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
1515 sizeof(struct ip_conntrack_expect),
1517 if (!ip_conntrack_expect_cachep) {
1518 printk(KERN_ERR "Unable to create ip_expect slab cache\n");
1519 goto err_free_conntrack_slab;
1522 /* Don't NEED lock here, but good form anyway. */
1523 write_lock_bh(&ip_conntrack_lock);
1524 for (i = 0; i < MAX_IP_CT_PROTO; i++)
1525 ip_ct_protos[i] = &ip_conntrack_generic_protocol;
1526 /* Sew in builtin protocols. */
1527 ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
1528 ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
1529 ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
1530 write_unlock_bh(&ip_conntrack_lock);
1532 /* For use by ipt_REJECT */
1533 ip_ct_attach = ip_conntrack_attach;
1535 /* Set up fake conntrack:
1536 - to never be deleted, not in any hashes */
1537 atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1538 /* - and look it like as a confirmed connection */
1539 set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1543 err_free_conntrack_slab:
1544 kmem_cache_destroy(ip_conntrack_cachep);
1546 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
1547 ip_conntrack_htable_size);
1549 nf_unregister_sockopt(&so_getorigdst);