This commit was manufactured by cvs2svn to create branch
[linux-2.6.git] / net / ipv4 / netfilter / ip_conntrack_core.c
1 /* Connection state tracking for netfilter.  This is separated from,
2    but required by, the NAT layer; it can also be used by an iptables
3    extension. */
4
5 /* (C) 1999-2001 Paul `Rusty' Russell  
6  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  *
12  * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
13  *      - new API and handling of conntrack/nat helpers
14  *      - now capable of multiple expectations for one master
15  * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
16  *      - add usage/reference counts to ip_conntrack_expect
17  *      - export ip_conntrack[_expect]_{find_get,put} functions
18  * */
19
20 #include <linux/config.h>
21 #include <linux/types.h>
22 #include <linux/icmp.h>
23 #include <linux/ip.h>
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/module.h>
27 #include <linux/skbuff.h>
28 #include <linux/proc_fs.h>
29 #include <linux/vmalloc.h>
30 #include <net/checksum.h>
31 #include <net/ip.h>
32 #include <linux/stddef.h>
33 #include <linux/sysctl.h>
34 #include <linux/slab.h>
35 #include <linux/random.h>
36 #include <linux/jhash.h>
37 #include <linux/err.h>
38 #include <linux/percpu.h>
39 #include <linux/moduleparam.h>
40
41 /* This rwlock protects the main hash table, protocol/helper/expected
42    registrations, conntrack timers*/
43 #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
44 #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
45
46 #include <linux/netfilter_ipv4/ip_conntrack.h>
47 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
48 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
49 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
50 #include <linux/netfilter_ipv4/listhelp.h>
51
52 #define IP_CONNTRACK_VERSION    "2.1"
53
54 #if 0
55 #define DEBUGP printk
56 #else
57 #define DEBUGP(format, args...)
58 #endif
59
60 DECLARE_RWLOCK(ip_conntrack_lock);
61
62 /* ip_conntrack_standalone needs this */
63 atomic_t ip_conntrack_count = ATOMIC_INIT(0);
64
65 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
66 LIST_HEAD(ip_conntrack_expect_list);
67 struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
68 static LIST_HEAD(helpers);
69 unsigned int ip_conntrack_htable_size = 0;
70 int ip_conntrack_max;
71 struct list_head *ip_conntrack_hash;
72 static kmem_cache_t *ip_conntrack_cachep;
73 static kmem_cache_t *ip_conntrack_expect_cachep;
74 struct ip_conntrack ip_conntrack_untracked;
75 unsigned int ip_ct_log_invalid;
76 static LIST_HEAD(unconfirmed);
77 static int ip_conntrack_vmalloc;
78
79 DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
80
81 void 
82 ip_conntrack_put(struct ip_conntrack *ct)
83 {
84         IP_NF_ASSERT(ct);
85         nf_conntrack_put(&ct->ct_general);
86 }
87
88 static int ip_conntrack_hash_rnd_initted;
89 static unsigned int ip_conntrack_hash_rnd;
90
91 static u_int32_t
92 hash_conntrack(const struct ip_conntrack_tuple *tuple)
93 {
94 #if 0
95         dump_tuple(tuple);
96 #endif
97         return (jhash_3words(tuple->src.ip,
98                              (tuple->dst.ip ^ tuple->dst.protonum),
99                              (tuple->src.u.all | (tuple->dst.u.all << 16)),
100                              ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
101 }
102
103 int
104 ip_ct_get_tuple(const struct iphdr *iph,
105                 const struct sk_buff *skb,
106                 unsigned int dataoff,
107                 struct ip_conntrack_tuple *tuple,
108                 const struct ip_conntrack_protocol *protocol)
109 {
110         /* Never happen */
111         if (iph->frag_off & htons(IP_OFFSET)) {
112                 printk("ip_conntrack_core: Frag of proto %u.\n",
113                        iph->protocol);
114                 return 0;
115         }
116
117         tuple->src.ip = iph->saddr;
118         tuple->dst.ip = iph->daddr;
119         tuple->dst.protonum = iph->protocol;
120         tuple->dst.dir = IP_CT_DIR_ORIGINAL;
121
122         return protocol->pkt_to_tuple(skb, dataoff, tuple);
123 }
124
125 int
126 ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
127                    const struct ip_conntrack_tuple *orig,
128                    const struct ip_conntrack_protocol *protocol)
129 {
130         inverse->src.ip = orig->dst.ip;
131         inverse->dst.ip = orig->src.ip;
132         inverse->dst.protonum = orig->dst.protonum;
133         inverse->dst.dir = !orig->dst.dir;
134
135         return protocol->invert_tuple(inverse, orig);
136 }
137
138
139 /* ip_conntrack_expect helper functions */
140 static void destroy_expect(struct ip_conntrack_expect *exp)
141 {
142         ip_conntrack_put(exp->master);
143         IP_NF_ASSERT(!timer_pending(&exp->timeout));
144         kmem_cache_free(ip_conntrack_expect_cachep, exp);
145         CONNTRACK_STAT_INC(expect_delete);
146 }
147
148 static void unlink_expect(struct ip_conntrack_expect *exp)
149 {
150         MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
151         list_del(&exp->list);
152         /* Logically in destroy_expect, but we hold the lock here. */
153         exp->master->expecting--;
154 }
155
156 static void expectation_timed_out(unsigned long ul_expect)
157 {
158         struct ip_conntrack_expect *exp = (void *)ul_expect;
159
160         WRITE_LOCK(&ip_conntrack_lock);
161         unlink_expect(exp);
162         WRITE_UNLOCK(&ip_conntrack_lock);
163         destroy_expect(exp);
164 }
165
166 /* If an expectation for this connection is found, it gets delete from
167  * global list then returned. */
168 struct ip_conntrack_expect *
169 __ip_conntrack_exp_find(const struct ip_conntrack_tuple *tuple)
170 {
171         struct ip_conntrack_expect *i;
172
173         list_for_each_entry(i, &ip_conntrack_expect_list, list) {
174                 /* If master is not in hash table yet (ie. packet hasn't left
175                    this machine yet), how can other end know about expected?
176                    Hence these are not the droids you are looking for (if
177                    master ct never got confirmed, we'd hold a reference to it
178                    and weird things would happen to future packets). */
179                 if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
180                     && is_confirmed(i->master)
181                     && del_timer(&i->timeout)) {
182                         unlink_expect(i);
183                         return i;
184                 }
185         }
186         return NULL;
187 }
188
189 /* delete all expectations for this conntrack */
190 static void remove_expectations(struct ip_conntrack *ct)
191 {
192         struct ip_conntrack_expect *i, *tmp;
193
194         /* Optimization: most connection never expect any others. */
195         if (ct->expecting == 0)
196                 return;
197
198         list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
199                 if (i->master == ct && del_timer(&i->timeout)) {
200                         unlink_expect(i);
201                         destroy_expect(i);
202                 }
203         }
204 }
205
206 static void
207 clean_from_lists(struct ip_conntrack *ct)
208 {
209         unsigned int ho, hr;
210         
211         DEBUGP("clean_from_lists(%p)\n", ct);
212         MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
213
214         ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
215         hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
216         LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
217         LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
218
219         /* Destroy all pending expectations */
220         remove_expectations(ct);
221 }
222
223 static void
224 destroy_conntrack(struct nf_conntrack *nfct)
225 {
226         struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
227         struct ip_conntrack_protocol *proto;
228
229         DEBUGP("destroy_conntrack(%p)\n", ct);
230         IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
231         IP_NF_ASSERT(!timer_pending(&ct->timeout));
232
233         /* To make sure we don't get any weird locking issues here:
234          * destroy_conntrack() MUST NOT be called with a write lock
235          * to ip_conntrack_lock!!! -HW */
236         proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
237         if (proto && proto->destroy)
238                 proto->destroy(ct);
239
240         if (ip_conntrack_destroyed)
241                 ip_conntrack_destroyed(ct);
242
243         WRITE_LOCK(&ip_conntrack_lock);
244         /* Expectations will have been removed in clean_from_lists,
245          * except TFTP can create an expectation on the first packet,
246          * before connection is in the list, so we need to clean here,
247          * too. */
248         remove_expectations(ct);
249
250         /* We overload first tuple to link into unconfirmed list. */
251         if (!is_confirmed(ct)) {
252                 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
253                 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
254         }
255
256         CONNTRACK_STAT_INC(delete);
257         WRITE_UNLOCK(&ip_conntrack_lock);
258
259         if (ct->master)
260                 ip_conntrack_put(ct->master);
261
262         DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
263         kmem_cache_free(ip_conntrack_cachep, ct);
264         atomic_dec(&ip_conntrack_count);
265 }
266
267 static void death_by_timeout(unsigned long ul_conntrack)
268 {
269         struct ip_conntrack *ct = (void *)ul_conntrack;
270
271         WRITE_LOCK(&ip_conntrack_lock);
272         /* Inside lock so preempt is disabled on module removal path.
273          * Otherwise we can get spurious warnings. */
274         CONNTRACK_STAT_INC(delete_list);
275         clean_from_lists(ct);
276         WRITE_UNLOCK(&ip_conntrack_lock);
277         ip_conntrack_put(ct);
278 }
279
280 static inline int
281 conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
282                     const struct ip_conntrack_tuple *tuple,
283                     const struct ip_conntrack *ignored_conntrack)
284 {
285         MUST_BE_READ_LOCKED(&ip_conntrack_lock);
286         return tuplehash_to_ctrack(i) != ignored_conntrack
287                 && ip_ct_tuple_equal(tuple, &i->tuple);
288 }
289
290 struct ip_conntrack_tuple_hash *
291 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
292                     const struct ip_conntrack *ignored_conntrack)
293 {
294         struct ip_conntrack_tuple_hash *h;
295         unsigned int hash = hash_conntrack(tuple);
296
297         MUST_BE_READ_LOCKED(&ip_conntrack_lock);
298         list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
299                 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
300                         CONNTRACK_STAT_INC(found);
301                         return h;
302                 }
303                 CONNTRACK_STAT_INC(searched);
304         }
305
306         return NULL;
307 }
308
309 /* Find a connection corresponding to a tuple. */
310 struct ip_conntrack_tuple_hash *
311 ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
312                       const struct ip_conntrack *ignored_conntrack)
313 {
314         struct ip_conntrack_tuple_hash *h;
315
316         READ_LOCK(&ip_conntrack_lock);
317         h = __ip_conntrack_find(tuple, ignored_conntrack);
318         if (h)
319                 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
320         READ_UNLOCK(&ip_conntrack_lock);
321
322         return h;
323 }
324
325 /* Confirm a connection given skb; places it in hash table */
326 int
327 __ip_conntrack_confirm(struct sk_buff **pskb)
328 {
329         unsigned int hash, repl_hash;
330         struct ip_conntrack *ct;
331         enum ip_conntrack_info ctinfo;
332
333         ct = ip_conntrack_get(*pskb, &ctinfo);
334
335         /* ipt_REJECT uses ip_conntrack_attach to attach related
336            ICMP/TCP RST packets in other direction.  Actual packet
337            which created connection will be IP_CT_NEW or for an
338            expected connection, IP_CT_RELATED. */
339         if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
340                 return NF_ACCEPT;
341
342         hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
343         repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
344
345         /* We're not in hash table, and we refuse to set up related
346            connections for unconfirmed conns.  But packet copies and
347            REJECT will give spurious warnings here. */
348         /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
349
350         /* No external references means noone else could have
351            confirmed us. */
352         IP_NF_ASSERT(!is_confirmed(ct));
353         DEBUGP("Confirming conntrack %p\n", ct);
354
355         WRITE_LOCK(&ip_conntrack_lock);
356
357         /* See if there's one in the list already, including reverse:
358            NAT could have grabbed it without realizing, since we're
359            not in the hash.  If there is, we lost race. */
360         if (!LIST_FIND(&ip_conntrack_hash[hash],
361                        conntrack_tuple_cmp,
362                        struct ip_conntrack_tuple_hash *,
363                        &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
364             && !LIST_FIND(&ip_conntrack_hash[repl_hash],
365                           conntrack_tuple_cmp,
366                           struct ip_conntrack_tuple_hash *,
367                           &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
368                 /* Remove from unconfirmed list */
369                 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
370
371                 list_prepend(&ip_conntrack_hash[hash],
372                              &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
373                 list_prepend(&ip_conntrack_hash[repl_hash],
374                              &ct->tuplehash[IP_CT_DIR_REPLY]);
375                 /* Timer relative to confirmation time, not original
376                    setting time, otherwise we'd get timer wrap in
377                    weird delay cases. */
378                 ct->timeout.expires += jiffies;
379                 add_timer(&ct->timeout);
380                 atomic_inc(&ct->ct_general.use);
381                 set_bit(IPS_CONFIRMED_BIT, &ct->status);
382                 CONNTRACK_STAT_INC(insert);
383                 WRITE_UNLOCK(&ip_conntrack_lock);
384                 return NF_ACCEPT;
385         }
386
387         CONNTRACK_STAT_INC(insert_failed);
388         WRITE_UNLOCK(&ip_conntrack_lock);
389
390         return NF_DROP;
391 }
392
393 /* Returns true if a connection correspondings to the tuple (required
394    for NAT). */
395 int
396 ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
397                          const struct ip_conntrack *ignored_conntrack)
398 {
399         struct ip_conntrack_tuple_hash *h;
400
401         READ_LOCK(&ip_conntrack_lock);
402         h = __ip_conntrack_find(tuple, ignored_conntrack);
403         READ_UNLOCK(&ip_conntrack_lock);
404
405         return h != NULL;
406 }
407
408 /* There's a small race here where we may free a just-assured
409    connection.  Too bad: we're in trouble anyway. */
410 static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
411 {
412         return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status));
413 }
414
415 static int early_drop(struct list_head *chain)
416 {
417         /* Traverse backwards: gives us oldest, which is roughly LRU */
418         struct ip_conntrack_tuple_hash *h;
419         struct ip_conntrack *ct = NULL;
420         int dropped = 0;
421
422         READ_LOCK(&ip_conntrack_lock);
423         h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
424         if (h) {
425                 ct = tuplehash_to_ctrack(h);
426                 atomic_inc(&ct->ct_general.use);
427         }
428         READ_UNLOCK(&ip_conntrack_lock);
429
430         if (!ct)
431                 return dropped;
432
433         if (del_timer(&ct->timeout)) {
434                 death_by_timeout((unsigned long)ct);
435                 dropped = 1;
436                 CONNTRACK_STAT_INC(early_drop);
437         }
438         ip_conntrack_put(ct);
439         return dropped;
440 }
441
442 static inline int helper_cmp(const struct ip_conntrack_helper *i,
443                              const struct ip_conntrack_tuple *rtuple)
444 {
445         return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
446 }
447
448 static struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
449 {
450         return LIST_FIND(&helpers, helper_cmp,
451                          struct ip_conntrack_helper *,
452                          tuple);
453 }
454
455 /* Allocate a new conntrack: we return -ENOMEM if classification
456    failed due to stress.  Otherwise it really is unclassifiable. */
457 static struct ip_conntrack_tuple_hash *
458 init_conntrack(const struct ip_conntrack_tuple *tuple,
459                struct ip_conntrack_protocol *protocol,
460                struct sk_buff *skb)
461 {
462         struct ip_conntrack *conntrack;
463         struct ip_conntrack_tuple repl_tuple;
464         size_t hash;
465         struct ip_conntrack_expect *exp;
466
467         if (!ip_conntrack_hash_rnd_initted) {
468                 get_random_bytes(&ip_conntrack_hash_rnd, 4);
469                 ip_conntrack_hash_rnd_initted = 1;
470         }
471
472         hash = hash_conntrack(tuple);
473
474         if (ip_conntrack_max
475             && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
476                 /* Try dropping from this hash chain. */
477                 if (!early_drop(&ip_conntrack_hash[hash])) {
478                         if (net_ratelimit())
479                                 printk(KERN_WARNING
480                                        "ip_conntrack: table full, dropping"
481                                        " packet.\n");
482                         return ERR_PTR(-ENOMEM);
483                 }
484         }
485
486         if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
487                 DEBUGP("Can't invert tuple.\n");
488                 return NULL;
489         }
490
491         conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
492         if (!conntrack) {
493                 DEBUGP("Can't allocate conntrack.\n");
494                 return ERR_PTR(-ENOMEM);
495         }
496
497         memset(conntrack, 0, sizeof(*conntrack));
498         atomic_set(&conntrack->ct_general.use, 1);
499         conntrack->ct_general.destroy = destroy_conntrack;
500         conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
501         conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
502 #if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE)
503         conntrack->xid[IP_CT_DIR_ORIGINAL] = -1;
504         conntrack->xid[IP_CT_DIR_REPLY] = -1;
505 #endif
506         if (!protocol->new(conntrack, skb)) {
507                 kmem_cache_free(ip_conntrack_cachep, conntrack);
508                 return NULL;
509         }
510         /* Don't set timer yet: wait for confirmation */
511         init_timer(&conntrack->timeout);
512         conntrack->timeout.data = (unsigned long)conntrack;
513         conntrack->timeout.function = death_by_timeout;
514
515         WRITE_LOCK(&ip_conntrack_lock);
516         exp = __ip_conntrack_exp_find(tuple);
517
518         if (exp) {
519                 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
520                         conntrack, exp);
521                 /* Welcome, Mr. Bond.  We've been expecting you... */
522                 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
523                 conntrack->master = exp->master;
524 #if CONFIG_IP_NF_CONNTRACK_MARK
525                 conntrack->mark = exp->master->mark;
526 #endif
527                 nf_conntrack_get(&conntrack->master->ct_general);
528                 CONNTRACK_STAT_INC(expect_new);
529         } else {
530                 conntrack->helper = ip_ct_find_helper(&repl_tuple);
531
532                 CONNTRACK_STAT_INC(new);
533         }
534
535         /* Overload tuple linked list to put us in unconfirmed list. */
536         list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
537
538         atomic_inc(&ip_conntrack_count);
539         WRITE_UNLOCK(&ip_conntrack_lock);
540
541         if (exp) {
542                 if (exp->expectfn)
543                         exp->expectfn(conntrack, exp);
544                 destroy_expect(exp);
545         }
546
547         return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
548 }
549
550 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
551 static inline struct ip_conntrack *
552 resolve_normal_ct(struct sk_buff *skb,
553                   struct ip_conntrack_protocol *proto,
554                   int *set_reply,
555                   unsigned int hooknum,
556                   enum ip_conntrack_info *ctinfo)
557 {
558         struct ip_conntrack_tuple tuple;
559         struct ip_conntrack_tuple_hash *h;
560         struct ip_conntrack *ct;
561
562         IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
563
564         if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, 
565                                 &tuple,proto))
566                 return NULL;
567
568         /* look for tuple match */
569         h = ip_conntrack_find_get(&tuple, NULL);
570         if (!h) {
571                 h = init_conntrack(&tuple, proto, skb);
572                 if (!h)
573                         return NULL;
574                 if (IS_ERR(h))
575                         return (void *)h;
576         }
577         ct = tuplehash_to_ctrack(h);
578
579         /* It exists; we have (non-exclusive) reference. */
580         if (DIRECTION(h) == IP_CT_DIR_REPLY) {
581                 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
582                 /* Please set reply bit if this packet OK */
583                 *set_reply = 1;
584         } else {
585                 /* Once we've had two way comms, always ESTABLISHED. */
586                 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
587                         DEBUGP("ip_conntrack_in: normal packet for %p\n",
588                                ct);
589                         *ctinfo = IP_CT_ESTABLISHED;
590                 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
591                         DEBUGP("ip_conntrack_in: related packet for %p\n",
592                                ct);
593                         *ctinfo = IP_CT_RELATED;
594                 } else {
595                         DEBUGP("ip_conntrack_in: new packet for %p\n",
596                                ct);
597                         *ctinfo = IP_CT_NEW;
598                 }
599                 *set_reply = 0;
600         }
601         skb->nfct = &ct->ct_general;
602         skb->nfctinfo = *ctinfo;
603         return ct;
604 }
605
606 /* Netfilter hook itself. */
607 unsigned int ip_conntrack_in(unsigned int hooknum,
608                              struct sk_buff **pskb,
609                              const struct net_device *in,
610                              const struct net_device *out,
611                              int (*okfn)(struct sk_buff *))
612 {
613         struct ip_conntrack *ct;
614         enum ip_conntrack_info ctinfo;
615         struct ip_conntrack_protocol *proto;
616         int set_reply;
617         int ret;
618
619         /* Previously seen (loopback or untracked)?  Ignore. */
620         if ((*pskb)->nfct) {
621                 CONNTRACK_STAT_INC(ignore);
622                 return NF_ACCEPT;
623         }
624
625         /* Never happen */
626         if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
627                 if (net_ratelimit()) {
628                 printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
629                        (*pskb)->nh.iph->protocol, hooknum);
630                 }
631                 return NF_DROP;
632         }
633
634         /* FIXME: Do this right please. --RR */
635         (*pskb)->nfcache |= NFC_UNKNOWN;
636
637 /* Doesn't cover locally-generated broadcast, so not worth it. */
638 #if 0
639         /* Ignore broadcast: no `connection'. */
640         if ((*pskb)->pkt_type == PACKET_BROADCAST) {
641                 printk("Broadcast packet!\n");
642                 return NF_ACCEPT;
643         } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF)) 
644                    == htonl(0x000000FF)) {
645                 printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
646                        NIPQUAD((*pskb)->nh.iph->saddr),
647                        NIPQUAD((*pskb)->nh.iph->daddr),
648                        (*pskb)->sk, (*pskb)->pkt_type);
649         }
650 #endif
651
652         proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
653
654         /* It may be an special packet, error, unclean...
655          * inverse of the return code tells to the netfilter
656          * core what to do with the packet. */
657         if (proto->error != NULL 
658             && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
659                 CONNTRACK_STAT_INC(error);
660                 CONNTRACK_STAT_INC(invalid);
661                 return -ret;
662         }
663
664         if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
665                 /* Not valid part of a connection */
666                 CONNTRACK_STAT_INC(invalid);
667                 return NF_ACCEPT;
668         }
669
670         if (IS_ERR(ct)) {
671                 /* Too stressed to deal. */
672                 CONNTRACK_STAT_INC(drop);
673                 return NF_DROP;
674         }
675
676         IP_NF_ASSERT((*pskb)->nfct);
677
678         ret = proto->packet(ct, *pskb, ctinfo);
679         if (ret < 0) {
680                 /* Invalid: inverse of the return code tells
681                  * the netfilter core what to do*/
682                 nf_conntrack_put((*pskb)->nfct);
683                 (*pskb)->nfct = NULL;
684                 CONNTRACK_STAT_INC(invalid);
685                 return -ret;
686         }
687
688         if (set_reply)
689                 set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
690
691         return ret;
692 }
693
694 int invert_tuplepr(struct ip_conntrack_tuple *inverse,
695                    const struct ip_conntrack_tuple *orig)
696 {
697         return ip_ct_invert_tuple(inverse, orig, 
698                                   ip_ct_find_proto(orig->dst.protonum));
699 }
700
701 /* Would two expected things clash? */
702 static inline int expect_clash(const struct ip_conntrack_expect *a,
703                                const struct ip_conntrack_expect *b)
704 {
705         /* Part covered by intersection of masks must be unequal,
706            otherwise they clash */
707         struct ip_conntrack_tuple intersect_mask
708                 = { { a->mask.src.ip & b->mask.src.ip,
709                       { a->mask.src.u.all & b->mask.src.u.all } },
710                     { a->mask.dst.ip & b->mask.dst.ip,
711                       { a->mask.dst.u.all & b->mask.dst.u.all },
712                       a->mask.dst.protonum & b->mask.dst.protonum } };
713
714         return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
715 }
716
717 static inline int expect_matches(const struct ip_conntrack_expect *a,
718                                  const struct ip_conntrack_expect *b)
719 {
720         return a->master == b->master
721                 && ip_ct_tuple_equal(&a->tuple, &b->tuple)
722                 && ip_ct_tuple_equal(&a->mask, &b->mask);
723 }
724
725 /* Generally a bad idea to call this: could have matched already. */
726 void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
727 {
728         struct ip_conntrack_expect *i;
729
730         WRITE_LOCK(&ip_conntrack_lock);
731         /* choose the the oldest expectation to evict */
732         list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
733                 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
734                         unlink_expect(i);
735                         WRITE_UNLOCK(&ip_conntrack_lock);
736                         destroy_expect(i);
737                         return;
738                 }
739         }
740         WRITE_UNLOCK(&ip_conntrack_lock);
741 }
742
743 struct ip_conntrack_expect *ip_conntrack_expect_alloc(void)
744 {
745         struct ip_conntrack_expect *new;
746
747         new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
748         if (!new) {
749                 DEBUGP("expect_related: OOM allocating expect\n");
750                 return NULL;
751         }
752         new->master = NULL;
753         return new;
754 }
755
756 void ip_conntrack_expect_free(struct ip_conntrack_expect *expect)
757 {
758         kmem_cache_free(ip_conntrack_expect_cachep, expect);
759 }
760
761 static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
762 {
763         atomic_inc(&exp->master->ct_general.use);
764         exp->master->expecting++;
765         list_add(&exp->list, &ip_conntrack_expect_list);
766
767         if (exp->master->helper->timeout) {
768                 init_timer(&exp->timeout);
769                 exp->timeout.data = (unsigned long)exp;
770                 exp->timeout.function = expectation_timed_out;
771                 exp->timeout.expires
772                         = jiffies + exp->master->helper->timeout * HZ;
773                 add_timer(&exp->timeout);
774         } else
775                 exp->timeout.function = NULL;
776
777         CONNTRACK_STAT_INC(expect_create);
778 }
779
780 /* Race with expectations being used means we could have none to find; OK. */
781 static void evict_oldest_expect(struct ip_conntrack *master)
782 {
783         struct ip_conntrack_expect *i;
784
785         list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
786                 if (i->master == master) {
787                         if (del_timer(&i->timeout)) {
788                                 unlink_expect(i);
789                                 destroy_expect(i);
790                         }
791                         break;
792                 }
793         }
794 }
795
796 static inline int refresh_timer(struct ip_conntrack_expect *i)
797 {
798         if (!del_timer(&i->timeout))
799                 return 0;
800
801         i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
802         add_timer(&i->timeout);
803         return 1;
804 }
805
806 int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
807 {
808         struct ip_conntrack_expect *i;
809         int ret;
810
811         DEBUGP("ip_conntrack_expect_related %p\n", related_to);
812         DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
813         DEBUGP("mask:  "); DUMP_TUPLE(&expect->mask);
814
815         WRITE_LOCK(&ip_conntrack_lock);
816         list_for_each_entry(i, &ip_conntrack_expect_list, list) {
817                 if (expect_matches(i, expect)) {
818                         /* Refresh timer: if it's dying, ignore.. */
819                         if (refresh_timer(i)) {
820                                 ret = 0;
821                                 /* We don't need the one they've given us. */
822                                 ip_conntrack_expect_free(expect);
823                                 goto out;
824                         }
825                 } else if (expect_clash(i, expect)) {
826                         ret = -EBUSY;
827                         goto out;
828                 }
829         }
830
831         /* Will be over limit? */
832         if (expect->master->helper->max_expected && 
833             expect->master->expecting >= expect->master->helper->max_expected)
834                 evict_oldest_expect(expect->master);
835
836         ip_conntrack_expect_insert(expect);
837         ret = 0;
838 out:
839         WRITE_UNLOCK(&ip_conntrack_lock);
840         return ret;
841 }
842
843 /* Alter reply tuple (maybe alter helper).  This is for NAT, and is
844    implicitly racy: see __ip_conntrack_confirm */
845 void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
846                               const struct ip_conntrack_tuple *newreply)
847 {
848         WRITE_LOCK(&ip_conntrack_lock);
849         /* Should be unconfirmed, so not in hash table yet */
850         IP_NF_ASSERT(!is_confirmed(conntrack));
851
852         DEBUGP("Altering reply tuple of %p to ", conntrack);
853         DUMP_TUPLE(newreply);
854
855         conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
856         if (!conntrack->master && conntrack->expecting == 0)
857                 conntrack->helper = ip_ct_find_helper(newreply);
858         WRITE_UNLOCK(&ip_conntrack_lock);
859 }
860
861 int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
862 {
863         BUG_ON(me->timeout == 0);
864         WRITE_LOCK(&ip_conntrack_lock);
865         list_prepend(&helpers, me);
866         WRITE_UNLOCK(&ip_conntrack_lock);
867
868         return 0;
869 }
870
871 static inline int unhelp(struct ip_conntrack_tuple_hash *i,
872                          const struct ip_conntrack_helper *me)
873 {
874         if (tuplehash_to_ctrack(i)->helper == me)
875                 tuplehash_to_ctrack(i)->helper = NULL;
876         return 0;
877 }
878
879 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
880 {
881         unsigned int i;
882         struct ip_conntrack_expect *exp, *tmp;
883
884         /* Need write lock here, to delete helper. */
885         WRITE_LOCK(&ip_conntrack_lock);
886         LIST_DELETE(&helpers, me);
887
888         /* Get rid of expectations */
889         list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
890                 if (exp->master->helper == me && del_timer(&exp->timeout)) {
891                         unlink_expect(exp);
892                         destroy_expect(exp);
893                 }
894         }
895         /* Get rid of expecteds, set helpers to NULL. */
896         LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
897         for (i = 0; i < ip_conntrack_htable_size; i++)
898                 LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
899                             struct ip_conntrack_tuple_hash *, me);
900         WRITE_UNLOCK(&ip_conntrack_lock);
901
902         /* Someone could be still looking at the helper in a bh. */
903         synchronize_net();
904 }
905
906 static inline void ct_add_counters(struct ip_conntrack *ct,
907                                    enum ip_conntrack_info ctinfo,
908                                    const struct sk_buff *skb)
909 {
910 #ifdef CONFIG_IP_NF_CT_ACCT
911         if (skb) {
912                 ct->counters[CTINFO2DIR(ctinfo)].packets++;
913                 ct->counters[CTINFO2DIR(ctinfo)].bytes += 
914                                         ntohs(skb->nh.iph->tot_len);
915         }
916 #endif
917 }
918
919 /* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */
920 void ip_ct_refresh_acct(struct ip_conntrack *ct, 
921                         enum ip_conntrack_info ctinfo,
922                         const struct sk_buff *skb,
923                         unsigned long extra_jiffies)
924 {
925         IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
926
927         /* If not in hash table, timer will not be active yet */
928         if (!is_confirmed(ct)) {
929                 ct->timeout.expires = extra_jiffies;
930                 ct_add_counters(ct, ctinfo, skb);
931         } else {
932                 WRITE_LOCK(&ip_conntrack_lock);
933                 /* Need del_timer for race avoidance (may already be dying). */
934                 if (del_timer(&ct->timeout)) {
935                         ct->timeout.expires = jiffies + extra_jiffies;
936                         add_timer(&ct->timeout);
937                 }
938                 ct_add_counters(ct, ctinfo, skb);
939                 WRITE_UNLOCK(&ip_conntrack_lock);
940         }
941 }
942
943 /* Returns new sk_buff, or NULL */
944 struct sk_buff *
945 ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
946 {
947 #ifdef CONFIG_NETFILTER_DEBUG
948         unsigned int olddebug = skb->nf_debug;
949 #endif
950
951         skb_orphan(skb);
952
953         local_bh_disable(); 
954         skb = ip_defrag(skb, user);
955         local_bh_enable();
956
957         if (skb) {
958                 ip_send_check(skb->nh.iph);
959                 skb->nfcache |= NFC_ALTERED;
960 #ifdef CONFIG_NETFILTER_DEBUG
961                 /* Packet path as if nothing had happened. */
962                 skb->nf_debug = olddebug;
963 #endif
964         }
965
966         return skb;
967 }
968
969 /* Used by ipt_REJECT. */
970 static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
971 {
972         struct ip_conntrack *ct;
973         enum ip_conntrack_info ctinfo;
974
975         /* This ICMP is in reverse direction to the packet which caused it */
976         ct = ip_conntrack_get(skb, &ctinfo);
977         
978         if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
979                 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
980         else
981                 ctinfo = IP_CT_RELATED;
982
983         /* Attach to new skbuff, and increment count */
984         nskb->nfct = &ct->ct_general;
985         nskb->nfctinfo = ctinfo;
986         nf_conntrack_get(nskb->nfct);
987 }
988
989 static inline int
990 do_iter(const struct ip_conntrack_tuple_hash *i,
991         int (*iter)(struct ip_conntrack *i, void *data),
992         void *data)
993 {
994         return iter(tuplehash_to_ctrack(i), data);
995 }
996
997 /* Bring out ya dead! */
998 static struct ip_conntrack_tuple_hash *
999 get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
1000                 void *data, unsigned int *bucket)
1001 {
1002         struct ip_conntrack_tuple_hash *h = NULL;
1003
1004         WRITE_LOCK(&ip_conntrack_lock);
1005         for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
1006                 h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
1007                                 struct ip_conntrack_tuple_hash *, iter, data);
1008                 if (h)
1009                         break;
1010         }
1011         if (!h)
1012                 h = LIST_FIND_W(&unconfirmed, do_iter,
1013                                 struct ip_conntrack_tuple_hash *, iter, data);
1014         if (h)
1015                 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
1016         WRITE_UNLOCK(&ip_conntrack_lock);
1017
1018         return h;
1019 }
1020
1021 void
1022 ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
1023 {
1024         struct ip_conntrack_tuple_hash *h;
1025         unsigned int bucket = 0;
1026
1027         while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
1028                 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1029                 /* Time to push up daises... */
1030                 if (del_timer(&ct->timeout))
1031                         death_by_timeout((unsigned long)ct);
1032                 /* ... else the timer will get him soon. */
1033
1034                 ip_conntrack_put(ct);
1035         }
1036 }
1037
1038 /* Fast function for those who don't want to parse /proc (and I don't
1039    blame them). */
1040 /* Reversing the socket's dst/src point of view gives us the reply
1041    mapping. */
1042 static int
1043 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
1044 {
1045         struct inet_sock *inet = inet_sk(sk);
1046         struct ip_conntrack_tuple_hash *h;
1047         struct ip_conntrack_tuple tuple;
1048         
1049         IP_CT_TUPLE_U_BLANK(&tuple);
1050         tuple.src.ip = inet->rcv_saddr;
1051         tuple.src.u.tcp.port = inet->sport;
1052         tuple.dst.ip = inet->daddr;
1053         tuple.dst.u.tcp.port = inet->dport;
1054         tuple.dst.protonum = IPPROTO_TCP;
1055
1056         /* We only do TCP at the moment: is there a better way? */
1057         if (strcmp(sk->sk_prot->name, "TCP")) {
1058                 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
1059                 return -ENOPROTOOPT;
1060         }
1061
1062         if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
1063                 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
1064                        *len, sizeof(struct sockaddr_in));
1065                 return -EINVAL;
1066         }
1067
1068         h = ip_conntrack_find_get(&tuple, NULL);
1069         if (h) {
1070                 struct sockaddr_in sin;
1071                 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1072
1073                 sin.sin_family = AF_INET;
1074                 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1075                         .tuple.dst.u.tcp.port;
1076                 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1077                         .tuple.dst.ip;
1078
1079                 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
1080                        NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1081                 ip_conntrack_put(ct);
1082                 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
1083                         return -EFAULT;
1084                 else
1085                         return 0;
1086         }
1087         DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
1088                NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
1089                NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
1090         return -ENOENT;
1091 }
1092
1093 static struct nf_sockopt_ops so_getorigdst = {
1094         .pf             = PF_INET,
1095         .get_optmin     = SO_ORIGINAL_DST,
1096         .get_optmax     = SO_ORIGINAL_DST+1,
1097         .get            = &getorigdst,
1098 };
1099
1100 static int kill_all(struct ip_conntrack *i, void *data)
1101 {
1102         return 1;
1103 }
1104
1105 static void free_conntrack_hash(void)
1106 {
1107         if (ip_conntrack_vmalloc)
1108                 vfree(ip_conntrack_hash);
1109         else
1110                 free_pages((unsigned long)ip_conntrack_hash, 
1111                            get_order(sizeof(struct list_head)
1112                                      * ip_conntrack_htable_size));
1113 }
1114
1115 /* Mishearing the voices in his head, our hero wonders how he's
1116    supposed to kill the mall. */
1117 void ip_conntrack_cleanup(void)
1118 {
1119         ip_ct_attach = NULL;
1120         /* This makes sure all current packets have passed through
1121            netfilter framework.  Roll on, two-stage module
1122            delete... */
1123         synchronize_net();
1124  
1125  i_see_dead_people:
1126         ip_ct_iterate_cleanup(kill_all, NULL);
1127         if (atomic_read(&ip_conntrack_count) != 0) {
1128                 schedule();
1129                 goto i_see_dead_people;
1130         }
1131
1132         kmem_cache_destroy(ip_conntrack_cachep);
1133         kmem_cache_destroy(ip_conntrack_expect_cachep);
1134         free_conntrack_hash();
1135         nf_unregister_sockopt(&so_getorigdst);
1136 }
1137
1138 static int hashsize;
1139 module_param(hashsize, int, 0400);
1140
1141 int __init ip_conntrack_init(void)
1142 {
1143         unsigned int i;
1144         int ret;
1145
1146         /* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
1147          * machine has 256 buckets.  >= 1GB machines have 8192 buckets. */
1148         if (hashsize) {
1149                 ip_conntrack_htable_size = hashsize;
1150         } else {
1151                 ip_conntrack_htable_size
1152                         = (((num_physpages << PAGE_SHIFT) / 16384)
1153                            / sizeof(struct list_head));
1154                 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1155                         ip_conntrack_htable_size = 8192;
1156                 if (ip_conntrack_htable_size < 16)
1157                         ip_conntrack_htable_size = 16;
1158         }
1159         ip_conntrack_max = 8 * ip_conntrack_htable_size;
1160
1161         printk("ip_conntrack version %s (%u buckets, %d max)"
1162                " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
1163                ip_conntrack_htable_size, ip_conntrack_max,
1164                sizeof(struct ip_conntrack));
1165
1166         ret = nf_register_sockopt(&so_getorigdst);
1167         if (ret != 0) {
1168                 printk(KERN_ERR "Unable to register netfilter socket option\n");
1169                 return ret;
1170         }
1171
1172         /* AK: the hash table is twice as big than needed because it
1173            uses list_head.  it would be much nicer to caches to use a
1174            single pointer list head here. */
1175         ip_conntrack_vmalloc = 0; 
1176         ip_conntrack_hash 
1177                 =(void*)__get_free_pages(GFP_KERNEL, 
1178                                          get_order(sizeof(struct list_head)
1179                                                    *ip_conntrack_htable_size));
1180         if (!ip_conntrack_hash) { 
1181                 ip_conntrack_vmalloc = 1;
1182                 printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n");
1183                 ip_conntrack_hash = vmalloc(sizeof(struct list_head)
1184                                             * ip_conntrack_htable_size);
1185         }
1186         if (!ip_conntrack_hash) {
1187                 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1188                 goto err_unreg_sockopt;
1189         }
1190
1191         ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
1192                                                 sizeof(struct ip_conntrack), 0,
1193                                                 0, NULL, NULL);
1194         if (!ip_conntrack_cachep) {
1195                 printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
1196                 goto err_free_hash;
1197         }
1198
1199         ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
1200                                         sizeof(struct ip_conntrack_expect),
1201                                         0, 0, NULL, NULL);
1202         if (!ip_conntrack_expect_cachep) {
1203                 printk(KERN_ERR "Unable to create ip_expect slab cache\n");
1204                 goto err_free_conntrack_slab;
1205         }
1206
1207         /* Don't NEED lock here, but good form anyway. */
1208         WRITE_LOCK(&ip_conntrack_lock);
1209         for (i = 0; i < MAX_IP_CT_PROTO; i++)
1210                 ip_ct_protos[i] = &ip_conntrack_generic_protocol;
1211         /* Sew in builtin protocols. */
1212         ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
1213         ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
1214         ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
1215         WRITE_UNLOCK(&ip_conntrack_lock);
1216
1217         for (i = 0; i < ip_conntrack_htable_size; i++)
1218                 INIT_LIST_HEAD(&ip_conntrack_hash[i]);
1219
1220         /* For use by ipt_REJECT */
1221         ip_ct_attach = ip_conntrack_attach;
1222
1223         /* Set up fake conntrack:
1224             - to never be deleted, not in any hashes */
1225         atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1226         /*  - and look it like as a confirmed connection */
1227         set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1228
1229         return ret;
1230
1231 err_free_conntrack_slab:
1232         kmem_cache_destroy(ip_conntrack_cachep);
1233 err_free_hash:
1234         free_conntrack_hash();
1235 err_unreg_sockopt:
1236         nf_unregister_sockopt(&so_getorigdst);
1237
1238         return -ENOMEM;
1239 }