vserver 1.9.3
[linux-2.6.git] / net / ipv4 / netfilter / ip_conntrack_core.c
index 00a89f4..67caf5f 100644 (file)
@@ -34,8 +34,9 @@
 #include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/jhash.h>
-/* For ERR_PTR().  Yeah, I know... --RR */
-#include <linux/fs.h>
+#include <linux/err.h>
+#include <linux/percpu.h>
+#include <linux/moduleparam.h>
 
 /* This rwlock protects the main hash table, protocol/helper/expected
    registrations, conntrack timers*/
 DECLARE_RWLOCK(ip_conntrack_lock);
 DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock);
 
+/* ip_conntrack_standalone needs this */
+atomic_t ip_conntrack_count = ATOMIC_INIT(0);
+EXPORT_SYMBOL(ip_conntrack_count);
+
 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
 LIST_HEAD(ip_conntrack_expect_list);
-LIST_HEAD(protocol_list);
+struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
 static LIST_HEAD(helpers);
 unsigned int ip_conntrack_htable_size = 0;
 int ip_conntrack_max;
-static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
 struct list_head *ip_conntrack_hash;
 static kmem_cache_t *ip_conntrack_cachep;
+static kmem_cache_t *ip_conntrack_expect_cachep;
 struct ip_conntrack ip_conntrack_untracked;
+unsigned int ip_ct_log_invalid;
 
-extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
-
-static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr,
-                             u_int8_t protocol)
-{
-       return protocol == curr->proto;
-}
-
-struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol)
-{
-       struct ip_conntrack_protocol *p;
-
-       MUST_BE_READ_LOCKED(&ip_conntrack_lock);
-       p = LIST_FIND(&protocol_list, proto_cmpfn,
-                     struct ip_conntrack_protocol *, protocol);
-       if (!p)
-               p = &ip_conntrack_generic_protocol;
-
-       return p;
-}
-
-struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol)
-{
-       struct ip_conntrack_protocol *p;
-
-       READ_LOCK(&ip_conntrack_lock);
-       p = __ip_ct_find_proto(protocol);
-       READ_UNLOCK(&ip_conntrack_lock);
-       return p;
-}
+DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
 
 inline void 
 ip_conntrack_put(struct ip_conntrack *ct)
 {
        IP_NF_ASSERT(ct);
-       IP_NF_ASSERT(ct->infos[0].master);
-       /* nf_conntrack_put wants to go via an info struct, so feed it
-           one at random. */
-       nf_conntrack_put(&ct->infos[0]);
+       nf_conntrack_put(&ct->ct_general);
 }
 
 static int ip_conntrack_hash_rnd_initted;
@@ -127,11 +101,11 @@ hash_conntrack(const struct ip_conntrack_tuple *tuple)
 }
 
 int
-get_tuple(const struct iphdr *iph,
-         const struct sk_buff *skb,
-         unsigned int dataoff,
-         struct ip_conntrack_tuple *tuple,
-         const struct ip_conntrack_protocol *protocol)
+ip_ct_get_tuple(const struct iphdr *iph,
+               const struct sk_buff *skb,
+               unsigned int dataoff,
+               struct ip_conntrack_tuple *tuple,
+               const struct ip_conntrack_protocol *protocol)
 {
        /* Never happen */
        if (iph->frag_off & htons(IP_OFFSET)) {
@@ -147,10 +121,10 @@ get_tuple(const struct iphdr *iph,
        return protocol->pkt_to_tuple(skb, dataoff, tuple);
 }
 
-static int
-invert_tuple(struct ip_conntrack_tuple *inverse,
-            const struct ip_conntrack_tuple *orig,
-            const struct ip_conntrack_protocol *protocol)
+int
+ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
+                  const struct ip_conntrack_tuple *orig,
+                  const struct ip_conntrack_protocol *protocol)
 {
        inverse->src.ip = orig->dst.ip;
        inverse->dst.ip = orig->src.ip;
@@ -177,7 +151,8 @@ destroy_expect(struct ip_conntrack_expect *exp)
        IP_NF_ASSERT(atomic_read(&exp->use) == 0);
        IP_NF_ASSERT(!timer_pending(&exp->timeout));
 
-       kfree(exp);
+       kmem_cache_free(ip_conntrack_expect_cachep, exp);
+       CONNTRACK_STAT_INC(expect_delete);
 }
 
 inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
@@ -336,7 +311,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
                        list_del(&ct->master->expected_list);
                        master = ct->master->expectant;
                }
-               kfree(ct->master);
+               kmem_cache_free(ip_conntrack_expect_cachep, ct->master);
        }
        WRITE_UNLOCK(&ip_conntrack_lock);
 
@@ -346,12 +321,15 @@ destroy_conntrack(struct nf_conntrack *nfct)
        DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
        kmem_cache_free(ip_conntrack_cachep, ct);
        atomic_dec(&ip_conntrack_count);
+       CONNTRACK_STAT_INC(delete);
 }
 
 static void death_by_timeout(unsigned long ul_conntrack)
 {
        struct ip_conntrack *ct = (void *)ul_conntrack;
 
+       CONNTRACK_STAT_INC(delete_list);
+
        WRITE_LOCK(&ip_conntrack_lock);
        clean_from_lists(ct);
        WRITE_UNLOCK(&ip_conntrack_lock);
@@ -374,13 +352,19 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
 {
        struct ip_conntrack_tuple_hash *h;
        unsigned int hash = hash_conntrack(tuple);
+       /* use per_cpu() to avoid multiple calls to smp_processor_id() */
+       unsigned int cpu = smp_processor_id();
 
        MUST_BE_READ_LOCKED(&ip_conntrack_lock);
-       h = LIST_FIND(&ip_conntrack_hash[hash],
-                     conntrack_tuple_cmp,
-                     struct ip_conntrack_tuple_hash *,
-                     tuple, ignored_conntrack);
-       return h;
+       list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
+               if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
+                       per_cpu(ip_conntrack_stat, cpu).found++;
+                       return h;
+               }
+               per_cpu(ip_conntrack_stat, cpu).searched++;
+       }
+
+       return NULL;
 }
 
 /* Find a connection corresponding to a tuple. */
@@ -399,36 +383,15 @@ ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
        return h;
 }
 
-static inline struct ip_conntrack *
-__ip_conntrack_get(struct nf_ct_info *nfct, enum ip_conntrack_info *ctinfo)
-{
-       struct ip_conntrack *ct
-               = (struct ip_conntrack *)nfct->master;
-
-       /* ctinfo is the index of the nfct inside the conntrack */
-       *ctinfo = nfct - ct->infos;
-       IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER);
-       return ct;
-}
-
-/* Return conntrack and conntrack_info given skb->nfct->master */
-struct ip_conntrack *
-ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
-{
-       if (skb->nfct) 
-               return __ip_conntrack_get(skb->nfct, ctinfo);
-       return NULL;
-}
-
-/* Confirm a connection given skb->nfct; places it in hash table */
+/* Confirm a connection given skb; places it in hash table */
 int
-__ip_conntrack_confirm(struct nf_ct_info *nfct)
+__ip_conntrack_confirm(struct sk_buff *skb)
 {
        unsigned int hash, repl_hash;
        struct ip_conntrack *ct;
        enum ip_conntrack_info ctinfo;
 
-       ct = __ip_conntrack_get(nfct, &ctinfo);
+       ct = ip_conntrack_get(skb, &ctinfo);
 
        /* ipt_REJECT uses ip_conntrack_attach to attach related
           ICMP/TCP RST packets in other direction.  Actual packet
@@ -474,10 +437,12 @@ __ip_conntrack_confirm(struct nf_ct_info *nfct)
                atomic_inc(&ct->ct_general.use);
                set_bit(IPS_CONFIRMED_BIT, &ct->status);
                WRITE_UNLOCK(&ip_conntrack_lock);
+               CONNTRACK_STAT_INC(insert);
                return NF_ACCEPT;
        }
 
        WRITE_UNLOCK(&ip_conntrack_lock);
+       CONNTRACK_STAT_INC(insert_failed);
        return NF_DROP;
 }
 
@@ -496,83 +461,6 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
        return h != NULL;
 }
 
-/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
-struct ip_conntrack *
-icmp_error_track(struct sk_buff *skb,
-                enum ip_conntrack_info *ctinfo,
-                unsigned int hooknum)
-{
-       struct ip_conntrack_tuple innertuple, origtuple;
-       struct {
-               struct icmphdr icmp;
-               struct iphdr ip;
-       } inside;
-       struct ip_conntrack_protocol *innerproto;
-       struct ip_conntrack_tuple_hash *h;
-       int dataoff;
-
-       IP_NF_ASSERT(skb->nfct == NULL);
-
-       /* Not enough header? */
-       if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &inside, sizeof(inside))!=0)
-               return NULL;
-
-       if (inside.icmp.type != ICMP_DEST_UNREACH
-           && inside.icmp.type != ICMP_SOURCE_QUENCH
-           && inside.icmp.type != ICMP_TIME_EXCEEDED
-           && inside.icmp.type != ICMP_PARAMETERPROB
-           && inside.icmp.type != ICMP_REDIRECT)
-               return NULL;
-
-       /* Ignore ICMP's containing fragments (shouldn't happen) */
-       if (inside.ip.frag_off & htons(IP_OFFSET)) {
-               DEBUGP("icmp_error_track: fragment of proto %u\n",
-                      inside.ip.protocol);
-               return NULL;
-       }
-
-       innerproto = ip_ct_find_proto(inside.ip.protocol);
-       dataoff = skb->nh.iph->ihl*4 + sizeof(inside.icmp) + inside.ip.ihl*4;
-       /* Are they talking about one of our connections? */
-       if (!get_tuple(&inside.ip, skb, dataoff, &origtuple, innerproto)) {
-               DEBUGP("icmp_error: ! get_tuple p=%u", inside.ip.protocol);
-               return NULL;
-       }
-
-       /* Ordinarily, we'd expect the inverted tupleproto, but it's
-          been preserved inside the ICMP. */
-       if (!invert_tuple(&innertuple, &origtuple, innerproto)) {
-               DEBUGP("icmp_error_track: Can't invert tuple\n");
-               return NULL;
-       }
-
-       *ctinfo = IP_CT_RELATED;
-
-       h = ip_conntrack_find_get(&innertuple, NULL);
-       if (!h) {
-               /* Locally generated ICMPs will match inverted if they
-                  haven't been SNAT'ed yet */
-               /* FIXME: NAT code has to handle half-done double NAT --RR */
-               if (hooknum == NF_IP_LOCAL_OUT)
-                       h = ip_conntrack_find_get(&origtuple, NULL);
-
-               if (!h) {
-                       DEBUGP("icmp_error_track: no match\n");
-                       return NULL;
-               }
-               /* Reverse direction from that found */
-               if (DIRECTION(h) != IP_CT_DIR_REPLY)
-                       *ctinfo += IP_CT_IS_REPLY;
-       } else {
-               if (DIRECTION(h) == IP_CT_DIR_REPLY)
-                       *ctinfo += IP_CT_IS_REPLY;
-       }
-
-       /* Update skb to refer to this connection */
-       skb->nfct = &h->ctrack->infos[*ctinfo];
-       return h->ctrack;
-}
-
 /* There's a small race here where we may free a just-assured
    connection.  Too bad: we're in trouble anyway. */
 static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
@@ -598,6 +486,7 @@ static int early_drop(struct list_head *chain)
        if (del_timer(&h->ctrack->timeout)) {
                death_by_timeout((unsigned long)h->ctrack);
                dropped = 1;
+               CONNTRACK_STAT_INC(early_drop);
        }
        ip_conntrack_put(h->ctrack);
        return dropped;
@@ -627,8 +516,6 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
        struct ip_conntrack_tuple repl_tuple;
        size_t hash;
        struct ip_conntrack_expect *expected;
-       int i;
-       static unsigned int drop_next;
 
        if (!ip_conntrack_hash_rnd_initted) {
                get_random_bytes(&ip_conntrack_hash_rnd, 4);
@@ -637,15 +524,10 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
 
        hash = hash_conntrack(tuple);
 
-       if (ip_conntrack_max &&
-           atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
-               /* Try dropping from random chain, or else from the
-                   chain about to put into (in case they're trying to
-                   bomb one hash chain). */
-               unsigned int next = (drop_next++)%ip_conntrack_htable_size;
-
-               if (!early_drop(&ip_conntrack_hash[next])
-                   && !early_drop(&ip_conntrack_hash[hash])) {
+       if (ip_conntrack_max
+           && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
+               /* Try dropping from this hash chain. */
+               if (!early_drop(&ip_conntrack_hash[hash])) {
                        if (net_ratelimit())
                                printk(KERN_WARNING
                                       "ip_conntrack: table full, dropping"
@@ -654,7 +536,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
                }
        }
 
-       if (!invert_tuple(&repl_tuple, tuple, protocol)) {
+       if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
                DEBUGP("Can't invert tuple.\n");
                return NULL;
        }
@@ -672,9 +554,6 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
        conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack;
        conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
        conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack;
-       for (i=0; i < IP_CT_NUMBER; i++)
-               conntrack->infos[i].master = &conntrack->ct_general;
-
        if (!protocol->new(conntrack, skb)) {
                kmem_cache_free(ip_conntrack_cachep, conntrack);
                return NULL;
@@ -693,41 +572,53 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
                             struct ip_conntrack_expect *, tuple);
        READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
 
-       /* If master is not in hash table yet (ie. packet hasn't left
-          this machine yet), how can other end know about expected?
-          Hence these are not the droids you are looking for (if
-          master ct never got confirmed, we'd hold a reference to it
-          and weird things would happen to future packets). */
-       if (expected && !is_confirmed(expected->expectant))
-               expected = NULL;
-
-       /* Look up the conntrack helper for master connections only */
-       if (!expected)
-               conntrack->helper = ip_ct_find_helper(&repl_tuple);
+       if (expected) {
+               /* If master is not in hash table yet (ie. packet hasn't left
+                  this machine yet), how can other end know about expected?
+                  Hence these are not the droids you are looking for (if
+                  master ct never got confirmed, we'd hold a reference to it
+                  and weird things would happen to future packets). */
+               if (!is_confirmed(expected->expectant)) {
+                       conntrack->helper = ip_ct_find_helper(&repl_tuple);
+                       goto end;
+               }
 
-       /* If the expectation is dying, then this is a loser. */
-       if (expected
-           && expected->expectant->helper->timeout
-           && ! del_timer(&expected->timeout))
-               expected = NULL;
+               /* Expectation is dying... */
+               if (expected->expectant->helper->timeout
+                   && !del_timer(&expected->timeout))
+                       goto end;       
 
-       if (expected) {
                DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
                        conntrack, expected);
                /* Welcome, Mr. Bond.  We've been expecting you... */
+               IP_NF_ASSERT(expected->expectant);
                __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
                conntrack->master = expected;
                expected->sibling = conntrack;
                LIST_DELETE(&ip_conntrack_expect_list, expected);
                expected->expectant->expecting--;
-               nf_conntrack_get(&master_ct(conntrack)->infos[0]);
+               nf_conntrack_get(&master_ct(conntrack)->ct_general);
+
+               /* this is a braindead... --pablo */
+               atomic_inc(&ip_conntrack_count);
+               WRITE_UNLOCK(&ip_conntrack_lock);
+
+               if (expected->expectfn)
+                       expected->expectfn(conntrack);
+       
+               CONNTRACK_STAT_INC(expect_new);
+
+               goto ret;
+       } else  {
+               conntrack->helper = ip_ct_find_helper(&repl_tuple);
+
+               CONNTRACK_STAT_INC(new);
        }
-       atomic_inc(&ip_conntrack_count);
+
+end:   atomic_inc(&ip_conntrack_count);
        WRITE_UNLOCK(&ip_conntrack_lock);
 
-       if (expected && expected->expectfn)
-               expected->expectfn(conntrack);
-       return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
+ret:   return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
 }
 
 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
@@ -743,7 +634,8 @@ resolve_normal_ct(struct sk_buff *skb,
 
        IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
 
-       if (!get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, &tuple, proto))
+       if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, 
+                               &tuple,proto))
                return NULL;
 
        /* look for tuple match */
@@ -778,7 +670,8 @@ resolve_normal_ct(struct sk_buff *skb,
                }
                *set_reply = 0;
        }
-       skb->nfct = &h->ctrack->infos[*ctinfo];
+       skb->nfct = &h->ctrack->ct_general;
+       skb->nfctinfo = *ctinfo;
        return h->ctrack;
 }
 
@@ -795,6 +688,12 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
        int set_reply;
        int ret;
 
+       /* Previously seen (loopback or untracked)?  Ignore. */
+       if ((*pskb)->nfct) {
+               CONNTRACK_STAT_INC(ignore);
+               return NF_ACCEPT;
+       }
+
        /* Never happen */
        if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
                if (net_ratelimit()) {
@@ -822,39 +721,47 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
        }
 #endif
 
-       /* Previously seen (loopback or untracked)?  Ignore. */
-       if ((*pskb)->nfct)
-               return NF_ACCEPT;
-
        proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
 
-       /* It may be an icmp error... */
-       if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP 
-           && icmp_error_track(*pskb, &ctinfo, hooknum))
-               return NF_ACCEPT;
+       /* It may be an special packet, error, unclean...
+        * inverse of the return code tells to the netfilter
+        * core what to do with the packet. */
+       if (proto->error != NULL 
+           && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
+               CONNTRACK_STAT_INC(error);
+               CONNTRACK_STAT_INC(invalid);
+               return -ret;
+       }
 
-       if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo)))
+       if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
                /* Not valid part of a connection */
+               CONNTRACK_STAT_INC(invalid);
                return NF_ACCEPT;
+       }
 
-       if (IS_ERR(ct))
+       if (IS_ERR(ct)) {
                /* Too stressed to deal. */
+               CONNTRACK_STAT_INC(drop);
                return NF_DROP;
+       }
 
        IP_NF_ASSERT((*pskb)->nfct);
 
        ret = proto->packet(ct, *pskb, ctinfo);
-       if (ret == -1) {
-               /* Invalid */
+       if (ret < 0) {
+               /* Invalid: inverse of the return code tells
+                * the netfilter core what to do*/
                nf_conntrack_put((*pskb)->nfct);
                (*pskb)->nfct = NULL;
-               return NF_ACCEPT;
+               CONNTRACK_STAT_INC(invalid);
+               return -ret;
        }
 
        if (ret != NF_DROP && ct->helper) {
                ret = ct->helper->help(*pskb, ct, ctinfo);
                if (ret == -1) {
                        /* Invalid */
+                       CONNTRACK_STAT_INC(invalid);
                        nf_conntrack_put((*pskb)->nfct);
                        (*pskb)->nfct = NULL;
                        return NF_ACCEPT;
@@ -869,7 +776,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
 int invert_tuplepr(struct ip_conntrack_tuple *inverse,
                   const struct ip_conntrack_tuple *orig)
 {
-       return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum));
+       return ip_ct_invert_tuple(inverse, orig, 
+                                 ip_ct_find_proto(orig->dst.protonum));
 }
 
 static inline int resent_expect(const struct ip_conntrack_expect *i,
@@ -923,9 +831,8 @@ struct ip_conntrack_expect *
 ip_conntrack_expect_alloc(void)
 {
        struct ip_conntrack_expect *new;
-       
-       new = (struct ip_conntrack_expect *)
-               kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC);
+
+       new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
        if (!new) {
                DEBUGP("expect_related: OOM allocating expect\n");
                return NULL;
@@ -933,6 +840,7 @@ ip_conntrack_expect_alloc(void)
 
        /* tuple_cmp compares whole union, we have to initialized cleanly */
        memset(new, 0, sizeof(struct ip_conntrack_expect));
+       atomic_set(&new->use, 1);
 
        return new;
 }
@@ -944,7 +852,6 @@ ip_conntrack_expect_insert(struct ip_conntrack_expect *new,
        DEBUGP("new expectation %p of conntrack %p\n", new, related_to);
        new->expectant = related_to;
        new->sibling = NULL;
-       atomic_set(&new->use, 1);
 
        /* add to expected list for this connection */
        list_add_tail(&new->expected_list, &related_to->sibling_list);
@@ -997,7 +904,8 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
                }
 
                WRITE_UNLOCK(&ip_conntrack_lock);
-               kfree(expect);
+               /* This expectation is not inserted so no need to lock */
+               kmem_cache_free(ip_conntrack_expect_cachep, expect);
                return -EEXIST;
 
        } else if (related_to->helper->max_expected && 
@@ -1015,7 +923,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
                                       related_to->helper->name,
                                       NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
                                       NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
-                       kfree(expect);
+                       kmem_cache_free(ip_conntrack_expect_cachep, expect);
                        return -EPERM;
                }
                DEBUGP("ip_conntrack: max number of expected "
@@ -1049,7 +957,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
                WRITE_UNLOCK(&ip_conntrack_lock);
                DEBUGP("expect_related: busy!\n");
 
-               kfree(expect);
+               kmem_cache_free(ip_conntrack_expect_cachep, expect);
                return -EBUSY;
        }
 
@@ -1057,6 +965,8 @@ out:       ip_conntrack_expect_insert(expect, related_to);
 
        WRITE_UNLOCK(&ip_conntrack_lock);
 
+       CONNTRACK_STAT_INC(expect_create);
+
        return ret;
 }
 
@@ -1164,25 +1074,45 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
        synchronize_net();
 }
 
-/* Refresh conntrack for this many jiffies. */
-void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies)
+static inline void ct_add_counters(struct ip_conntrack *ct,
+                                  enum ip_conntrack_info ctinfo,
+                                  const struct sk_buff *skb)
+{
+#ifdef CONFIG_IP_NF_CT_ACCT
+       if (skb) {
+               ct->counters[CTINFO2DIR(ctinfo)].packets++;
+               ct->counters[CTINFO2DIR(ctinfo)].bytes += 
+                                       ntohs(skb->nh.iph->tot_len);
+       }
+#endif
+}
+
+/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */
+void ip_ct_refresh_acct(struct ip_conntrack *ct, 
+                       enum ip_conntrack_info ctinfo,
+                       const struct sk_buff *skb,
+                       unsigned long extra_jiffies)
 {
        IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
 
        /* If not in hash table, timer will not be active yet */
-       if (!is_confirmed(ct))
+       if (!is_confirmed(ct)) {
                ct->timeout.expires = extra_jiffies;
-       else {
+               ct_add_counters(ct, ctinfo, skb);
+       } else {
                WRITE_LOCK(&ip_conntrack_lock);
                /* Need del_timer for race avoidance (may already be dying). */
                if (del_timer(&ct->timeout)) {
                        ct->timeout.expires = jiffies + extra_jiffies;
                        add_timer(&ct->timeout);
                }
+               ct_add_counters(ct, ctinfo, skb);
                WRITE_UNLOCK(&ip_conntrack_lock);
        }
 }
 
+int ip_ct_no_defrag;
+
 /* Returns new sk_buff, or NULL */
 struct sk_buff *
 ip_ct_gather_frags(struct sk_buff *skb)
@@ -1191,6 +1121,12 @@ ip_ct_gather_frags(struct sk_buff *skb)
 #ifdef CONFIG_NETFILTER_DEBUG
        unsigned int olddebug = skb->nf_debug;
 #endif
+
+       if (unlikely(ip_ct_no_defrag)) {
+               kfree_skb(skb);
+               return NULL;
+       }
+
        if (sk) {
                sock_hold(sk);
                skb_orphan(skb);
@@ -1221,23 +1157,23 @@ ip_ct_gather_frags(struct sk_buff *skb)
 }
 
 /* Used by ipt_REJECT. */
-static void ip_conntrack_attach(struct sk_buff *nskb, struct nf_ct_info *nfct)
+static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
 {
        struct ip_conntrack *ct;
        enum ip_conntrack_info ctinfo;
 
-       ct = __ip_conntrack_get(nfct, &ctinfo);
-
-       /* This ICMP is in reverse direction to the packet which
-           caused it */
+       /* This ICMP is in reverse direction to the packet which caused it */
+       ct = ip_conntrack_get(skb, &ctinfo);
+       
        if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
                ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
        else
                ctinfo = IP_CT_RELATED;
 
-       /* Attach new skbuff, and increment count */
-       nskb->nfct = &ct->infos[ctinfo];
-       atomic_inc(&ct->ct_general.use);
+       /* Attach to new skbuff, and increment count */
+       nskb->nfct = &ct->ct_general;
+       nskb->nfctinfo = ctinfo;
+       nf_conntrack_get(nskb->nfct);
 }
 
 static inline int
@@ -1368,12 +1304,13 @@ void ip_conntrack_cleanup(void)
        }
 
        kmem_cache_destroy(ip_conntrack_cachep);
+       kmem_cache_destroy(ip_conntrack_expect_cachep);
        vfree(ip_conntrack_hash);
        nf_unregister_sockopt(&so_getorigdst);
 }
 
 static int hashsize;
-MODULE_PARM(hashsize, "i");
+module_param(hashsize, int, 0400);
 
 int __init ip_conntrack_init(void)
 {
@@ -1420,12 +1357,23 @@ int __init ip_conntrack_init(void)
                printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
                goto err_free_hash;
        }
+
+       ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
+                                       sizeof(struct ip_conntrack_expect),
+                                       0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+       if (!ip_conntrack_expect_cachep) {
+               printk(KERN_ERR "Unable to create ip_expect slab cache\n");
+               goto err_free_conntrack_slab;
+       }
+
        /* Don't NEED lock here, but good form anyway. */
        WRITE_LOCK(&ip_conntrack_lock);
+       for (i = 0; i < MAX_IP_CT_PROTO; i++)
+               ip_ct_protos[i] = &ip_conntrack_generic_protocol;
        /* Sew in builtin protocols. */
-       list_append(&protocol_list, &ip_conntrack_protocol_tcp);
-       list_append(&protocol_list, &ip_conntrack_protocol_udp);
-       list_append(&protocol_list, &ip_conntrack_protocol_icmp);
+       ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
+       ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
+       ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
        WRITE_UNLOCK(&ip_conntrack_lock);
 
        for (i = 0; i < ip_conntrack_htable_size; i++)
@@ -1439,14 +1387,11 @@ int __init ip_conntrack_init(void)
        atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
        /*  - and look it like as a confirmed connection */
        set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
-       /*  - and prepare the ctinfo field for REJECT & NAT. */
-       ip_conntrack_untracked.infos[IP_CT_NEW].master =
-       ip_conntrack_untracked.infos[IP_CT_RELATED].master =
-       ip_conntrack_untracked.infos[IP_CT_RELATED + IP_CT_IS_REPLY].master = 
-                       &ip_conntrack_untracked.ct_general;
 
        return ret;
 
+err_free_conntrack_slab:
+       kmem_cache_destroy(ip_conntrack_cachep);
 err_free_hash:
        vfree(ip_conntrack_hash);
 err_unreg_sockopt: