X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Fnetfilter%2Fip_conntrack_core.c;h=67caf5f43daf6e37e3c7db03e2949c957b96f8c8;hb=c7b5ebbddf7bcd3651947760f423e3783bbe6573;hp=00a89f4f8d8b5580a838fb9b3a4b38f00edef8d7;hpb=a2c21200f1c81b08cb55e417b68150bba439b646;p=linux-2.6.git diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 00a89f4f8..67caf5f43 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -34,8 +34,9 @@ #include #include #include -/* For ERR_PTR(). Yeah, I know... --RR */ -#include +#include +#include +#include /* This rwlock protects the main hash table, protocol/helper/expected registrations, conntrack timers*/ @@ -59,56 +60,29 @@ DECLARE_RWLOCK(ip_conntrack_lock); DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock); +/* ip_conntrack_standalone needs this */ +atomic_t ip_conntrack_count = ATOMIC_INIT(0); +EXPORT_SYMBOL(ip_conntrack_count); + void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; LIST_HEAD(ip_conntrack_expect_list); -LIST_HEAD(protocol_list); +struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; static LIST_HEAD(helpers); unsigned int ip_conntrack_htable_size = 0; int ip_conntrack_max; -static atomic_t ip_conntrack_count = ATOMIC_INIT(0); struct list_head *ip_conntrack_hash; static kmem_cache_t *ip_conntrack_cachep; +static kmem_cache_t *ip_conntrack_expect_cachep; struct ip_conntrack ip_conntrack_untracked; +unsigned int ip_ct_log_invalid; -extern struct ip_conntrack_protocol ip_conntrack_generic_protocol; - -static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr, - u_int8_t protocol) -{ - return protocol == curr->proto; -} - -struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol) -{ - struct ip_conntrack_protocol *p; - - MUST_BE_READ_LOCKED(&ip_conntrack_lock); - p = LIST_FIND(&protocol_list, proto_cmpfn, - struct ip_conntrack_protocol *, protocol); - if (!p) - p = &ip_conntrack_generic_protocol; - - return p; -} - -struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol) -{ - struct ip_conntrack_protocol *p; - - READ_LOCK(&ip_conntrack_lock); - p = __ip_ct_find_proto(protocol); - READ_UNLOCK(&ip_conntrack_lock); - return p; -} +DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); inline void ip_conntrack_put(struct ip_conntrack *ct) { IP_NF_ASSERT(ct); - IP_NF_ASSERT(ct->infos[0].master); - /* nf_conntrack_put wants to go via an info struct, so feed it - one at random. */ - nf_conntrack_put(&ct->infos[0]); + nf_conntrack_put(&ct->ct_general); } static int ip_conntrack_hash_rnd_initted; @@ -127,11 +101,11 @@ hash_conntrack(const struct ip_conntrack_tuple *tuple) } int -get_tuple(const struct iphdr *iph, - const struct sk_buff *skb, - unsigned int dataoff, - struct ip_conntrack_tuple *tuple, - const struct ip_conntrack_protocol *protocol) +ip_ct_get_tuple(const struct iphdr *iph, + const struct sk_buff *skb, + unsigned int dataoff, + struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_protocol *protocol) { /* Never happen */ if (iph->frag_off & htons(IP_OFFSET)) { @@ -147,10 +121,10 @@ get_tuple(const struct iphdr *iph, return protocol->pkt_to_tuple(skb, dataoff, tuple); } -static int -invert_tuple(struct ip_conntrack_tuple *inverse, - const struct ip_conntrack_tuple *orig, - const struct ip_conntrack_protocol *protocol) +int +ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse, + const struct ip_conntrack_tuple *orig, + const struct ip_conntrack_protocol *protocol) { inverse->src.ip = orig->dst.ip; inverse->dst.ip = orig->src.ip; @@ -177,7 +151,8 @@ destroy_expect(struct ip_conntrack_expect *exp) IP_NF_ASSERT(atomic_read(&exp->use) == 0); IP_NF_ASSERT(!timer_pending(&exp->timeout)); - kfree(exp); + kmem_cache_free(ip_conntrack_expect_cachep, exp); + CONNTRACK_STAT_INC(expect_delete); } inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp) @@ -336,7 +311,7 @@ destroy_conntrack(struct nf_conntrack *nfct) list_del(&ct->master->expected_list); master = ct->master->expectant; } - kfree(ct->master); + kmem_cache_free(ip_conntrack_expect_cachep, ct->master); } WRITE_UNLOCK(&ip_conntrack_lock); @@ -346,12 +321,15 @@ destroy_conntrack(struct nf_conntrack *nfct) DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); kmem_cache_free(ip_conntrack_cachep, ct); atomic_dec(&ip_conntrack_count); + CONNTRACK_STAT_INC(delete); } static void death_by_timeout(unsigned long ul_conntrack) { struct ip_conntrack *ct = (void *)ul_conntrack; + CONNTRACK_STAT_INC(delete_list); + WRITE_LOCK(&ip_conntrack_lock); clean_from_lists(ct); WRITE_UNLOCK(&ip_conntrack_lock); @@ -374,13 +352,19 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, { struct ip_conntrack_tuple_hash *h; unsigned int hash = hash_conntrack(tuple); + /* use per_cpu() to avoid multiple calls to smp_processor_id() */ + unsigned int cpu = smp_processor_id(); MUST_BE_READ_LOCKED(&ip_conntrack_lock); - h = LIST_FIND(&ip_conntrack_hash[hash], - conntrack_tuple_cmp, - struct ip_conntrack_tuple_hash *, - tuple, ignored_conntrack); - return h; + list_for_each_entry(h, &ip_conntrack_hash[hash], list) { + if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { + per_cpu(ip_conntrack_stat, cpu).found++; + return h; + } + per_cpu(ip_conntrack_stat, cpu).searched++; + } + + return NULL; } /* Find a connection corresponding to a tuple. */ @@ -399,36 +383,15 @@ ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, return h; } -static inline struct ip_conntrack * -__ip_conntrack_get(struct nf_ct_info *nfct, enum ip_conntrack_info *ctinfo) -{ - struct ip_conntrack *ct - = (struct ip_conntrack *)nfct->master; - - /* ctinfo is the index of the nfct inside the conntrack */ - *ctinfo = nfct - ct->infos; - IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER); - return ct; -} - -/* Return conntrack and conntrack_info given skb->nfct->master */ -struct ip_conntrack * -ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo) -{ - if (skb->nfct) - return __ip_conntrack_get(skb->nfct, ctinfo); - return NULL; -} - -/* Confirm a connection given skb->nfct; places it in hash table */ +/* Confirm a connection given skb; places it in hash table */ int -__ip_conntrack_confirm(struct nf_ct_info *nfct) +__ip_conntrack_confirm(struct sk_buff *skb) { unsigned int hash, repl_hash; struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; - ct = __ip_conntrack_get(nfct, &ctinfo); + ct = ip_conntrack_get(skb, &ctinfo); /* ipt_REJECT uses ip_conntrack_attach to attach related ICMP/TCP RST packets in other direction. Actual packet @@ -474,10 +437,12 @@ __ip_conntrack_confirm(struct nf_ct_info *nfct) atomic_inc(&ct->ct_general.use); set_bit(IPS_CONFIRMED_BIT, &ct->status); WRITE_UNLOCK(&ip_conntrack_lock); + CONNTRACK_STAT_INC(insert); return NF_ACCEPT; } WRITE_UNLOCK(&ip_conntrack_lock); + CONNTRACK_STAT_INC(insert_failed); return NF_DROP; } @@ -496,83 +461,6 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, return h != NULL; } -/* Returns conntrack if it dealt with ICMP, and filled in skb fields */ -struct ip_conntrack * -icmp_error_track(struct sk_buff *skb, - enum ip_conntrack_info *ctinfo, - unsigned int hooknum) -{ - struct ip_conntrack_tuple innertuple, origtuple; - struct { - struct icmphdr icmp; - struct iphdr ip; - } inside; - struct ip_conntrack_protocol *innerproto; - struct ip_conntrack_tuple_hash *h; - int dataoff; - - IP_NF_ASSERT(skb->nfct == NULL); - - /* Not enough header? */ - if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &inside, sizeof(inside))!=0) - return NULL; - - if (inside.icmp.type != ICMP_DEST_UNREACH - && inside.icmp.type != ICMP_SOURCE_QUENCH - && inside.icmp.type != ICMP_TIME_EXCEEDED - && inside.icmp.type != ICMP_PARAMETERPROB - && inside.icmp.type != ICMP_REDIRECT) - return NULL; - - /* Ignore ICMP's containing fragments (shouldn't happen) */ - if (inside.ip.frag_off & htons(IP_OFFSET)) { - DEBUGP("icmp_error_track: fragment of proto %u\n", - inside.ip.protocol); - return NULL; - } - - innerproto = ip_ct_find_proto(inside.ip.protocol); - dataoff = skb->nh.iph->ihl*4 + sizeof(inside.icmp) + inside.ip.ihl*4; - /* Are they talking about one of our connections? */ - if (!get_tuple(&inside.ip, skb, dataoff, &origtuple, innerproto)) { - DEBUGP("icmp_error: ! get_tuple p=%u", inside.ip.protocol); - return NULL; - } - - /* Ordinarily, we'd expect the inverted tupleproto, but it's - been preserved inside the ICMP. */ - if (!invert_tuple(&innertuple, &origtuple, innerproto)) { - DEBUGP("icmp_error_track: Can't invert tuple\n"); - return NULL; - } - - *ctinfo = IP_CT_RELATED; - - h = ip_conntrack_find_get(&innertuple, NULL); - if (!h) { - /* Locally generated ICMPs will match inverted if they - haven't been SNAT'ed yet */ - /* FIXME: NAT code has to handle half-done double NAT --RR */ - if (hooknum == NF_IP_LOCAL_OUT) - h = ip_conntrack_find_get(&origtuple, NULL); - - if (!h) { - DEBUGP("icmp_error_track: no match\n"); - return NULL; - } - /* Reverse direction from that found */ - if (DIRECTION(h) != IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; - } else { - if (DIRECTION(h) == IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; - } - - /* Update skb to refer to this connection */ - skb->nfct = &h->ctrack->infos[*ctinfo]; - return h->ctrack; -} - /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ static inline int unreplied(const struct ip_conntrack_tuple_hash *i) @@ -598,6 +486,7 @@ static int early_drop(struct list_head *chain) if (del_timer(&h->ctrack->timeout)) { death_by_timeout((unsigned long)h->ctrack); dropped = 1; + CONNTRACK_STAT_INC(early_drop); } ip_conntrack_put(h->ctrack); return dropped; @@ -627,8 +516,6 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, struct ip_conntrack_tuple repl_tuple; size_t hash; struct ip_conntrack_expect *expected; - int i; - static unsigned int drop_next; if (!ip_conntrack_hash_rnd_initted) { get_random_bytes(&ip_conntrack_hash_rnd, 4); @@ -637,15 +524,10 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, hash = hash_conntrack(tuple); - if (ip_conntrack_max && - atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { - /* Try dropping from random chain, or else from the - chain about to put into (in case they're trying to - bomb one hash chain). */ - unsigned int next = (drop_next++)%ip_conntrack_htable_size; - - if (!early_drop(&ip_conntrack_hash[next]) - && !early_drop(&ip_conntrack_hash[hash])) { + if (ip_conntrack_max + && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { + /* Try dropping from this hash chain. */ + if (!early_drop(&ip_conntrack_hash[hash])) { if (net_ratelimit()) printk(KERN_WARNING "ip_conntrack: table full, dropping" @@ -654,7 +536,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, } } - if (!invert_tuple(&repl_tuple, tuple, protocol)) { + if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) { DEBUGP("Can't invert tuple.\n"); return NULL; } @@ -672,9 +554,6 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack; conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple; conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack; - for (i=0; i < IP_CT_NUMBER; i++) - conntrack->infos[i].master = &conntrack->ct_general; - if (!protocol->new(conntrack, skb)) { kmem_cache_free(ip_conntrack_cachep, conntrack); return NULL; @@ -693,41 +572,53 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, struct ip_conntrack_expect *, tuple); READ_UNLOCK(&ip_conntrack_expect_tuple_lock); - /* If master is not in hash table yet (ie. packet hasn't left - this machine yet), how can other end know about expected? - Hence these are not the droids you are looking for (if - master ct never got confirmed, we'd hold a reference to it - and weird things would happen to future packets). */ - if (expected && !is_confirmed(expected->expectant)) - expected = NULL; - - /* Look up the conntrack helper for master connections only */ - if (!expected) - conntrack->helper = ip_ct_find_helper(&repl_tuple); + if (expected) { + /* If master is not in hash table yet (ie. packet hasn't left + this machine yet), how can other end know about expected? + Hence these are not the droids you are looking for (if + master ct never got confirmed, we'd hold a reference to it + and weird things would happen to future packets). */ + if (!is_confirmed(expected->expectant)) { + conntrack->helper = ip_ct_find_helper(&repl_tuple); + goto end; + } - /* If the expectation is dying, then this is a loser. */ - if (expected - && expected->expectant->helper->timeout - && ! del_timer(&expected->timeout)) - expected = NULL; + /* Expectation is dying... */ + if (expected->expectant->helper->timeout + && !del_timer(&expected->timeout)) + goto end; - if (expected) { DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", conntrack, expected); /* Welcome, Mr. Bond. We've been expecting you... */ + IP_NF_ASSERT(expected->expectant); __set_bit(IPS_EXPECTED_BIT, &conntrack->status); conntrack->master = expected; expected->sibling = conntrack; LIST_DELETE(&ip_conntrack_expect_list, expected); expected->expectant->expecting--; - nf_conntrack_get(&master_ct(conntrack)->infos[0]); + nf_conntrack_get(&master_ct(conntrack)->ct_general); + + /* this is a braindead... --pablo */ + atomic_inc(&ip_conntrack_count); + WRITE_UNLOCK(&ip_conntrack_lock); + + if (expected->expectfn) + expected->expectfn(conntrack); + + CONNTRACK_STAT_INC(expect_new); + + goto ret; + } else { + conntrack->helper = ip_ct_find_helper(&repl_tuple); + + CONNTRACK_STAT_INC(new); } - atomic_inc(&ip_conntrack_count); + +end: atomic_inc(&ip_conntrack_count); WRITE_UNLOCK(&ip_conntrack_lock); - if (expected && expected->expectfn) - expected->expectfn(conntrack); - return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; +ret: return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; } /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ @@ -743,7 +634,8 @@ resolve_normal_ct(struct sk_buff *skb, IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0); - if (!get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, &tuple, proto)) + if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, + &tuple,proto)) return NULL; /* look for tuple match */ @@ -778,7 +670,8 @@ resolve_normal_ct(struct sk_buff *skb, } *set_reply = 0; } - skb->nfct = &h->ctrack->infos[*ctinfo]; + skb->nfct = &h->ctrack->ct_general; + skb->nfctinfo = *ctinfo; return h->ctrack; } @@ -795,6 +688,12 @@ unsigned int ip_conntrack_in(unsigned int hooknum, int set_reply; int ret; + /* Previously seen (loopback or untracked)? Ignore. */ + if ((*pskb)->nfct) { + CONNTRACK_STAT_INC(ignore); + return NF_ACCEPT; + } + /* Never happen */ if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) { if (net_ratelimit()) { @@ -822,39 +721,47 @@ unsigned int ip_conntrack_in(unsigned int hooknum, } #endif - /* Previously seen (loopback or untracked)? Ignore. */ - if ((*pskb)->nfct) - return NF_ACCEPT; - proto = ip_ct_find_proto((*pskb)->nh.iph->protocol); - /* It may be an icmp error... */ - if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP - && icmp_error_track(*pskb, &ctinfo, hooknum)) - return NF_ACCEPT; + /* It may be an special packet, error, unclean... + * inverse of the return code tells to the netfilter + * core what to do with the packet. */ + if (proto->error != NULL + && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) { + CONNTRACK_STAT_INC(error); + CONNTRACK_STAT_INC(invalid); + return -ret; + } - if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) + if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) { /* Not valid part of a connection */ + CONNTRACK_STAT_INC(invalid); return NF_ACCEPT; + } - if (IS_ERR(ct)) + if (IS_ERR(ct)) { /* Too stressed to deal. */ + CONNTRACK_STAT_INC(drop); return NF_DROP; + } IP_NF_ASSERT((*pskb)->nfct); ret = proto->packet(ct, *pskb, ctinfo); - if (ret == -1) { - /* Invalid */ + if (ret < 0) { + /* Invalid: inverse of the return code tells + * the netfilter core what to do*/ nf_conntrack_put((*pskb)->nfct); (*pskb)->nfct = NULL; - return NF_ACCEPT; + CONNTRACK_STAT_INC(invalid); + return -ret; } if (ret != NF_DROP && ct->helper) { ret = ct->helper->help(*pskb, ct, ctinfo); if (ret == -1) { /* Invalid */ + CONNTRACK_STAT_INC(invalid); nf_conntrack_put((*pskb)->nfct); (*pskb)->nfct = NULL; return NF_ACCEPT; @@ -869,7 +776,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum, int invert_tuplepr(struct ip_conntrack_tuple *inverse, const struct ip_conntrack_tuple *orig) { - return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum)); + return ip_ct_invert_tuple(inverse, orig, + ip_ct_find_proto(orig->dst.protonum)); } static inline int resent_expect(const struct ip_conntrack_expect *i, @@ -923,9 +831,8 @@ struct ip_conntrack_expect * ip_conntrack_expect_alloc(void) { struct ip_conntrack_expect *new; - - new = (struct ip_conntrack_expect *) - kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC); + + new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC); if (!new) { DEBUGP("expect_related: OOM allocating expect\n"); return NULL; @@ -933,6 +840,7 @@ ip_conntrack_expect_alloc(void) /* tuple_cmp compares whole union, we have to initialized cleanly */ memset(new, 0, sizeof(struct ip_conntrack_expect)); + atomic_set(&new->use, 1); return new; } @@ -944,7 +852,6 @@ ip_conntrack_expect_insert(struct ip_conntrack_expect *new, DEBUGP("new expectation %p of conntrack %p\n", new, related_to); new->expectant = related_to; new->sibling = NULL; - atomic_set(&new->use, 1); /* add to expected list for this connection */ list_add_tail(&new->expected_list, &related_to->sibling_list); @@ -997,7 +904,8 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect, } WRITE_UNLOCK(&ip_conntrack_lock); - kfree(expect); + /* This expectation is not inserted so no need to lock */ + kmem_cache_free(ip_conntrack_expect_cachep, expect); return -EEXIST; } else if (related_to->helper->max_expected && @@ -1015,7 +923,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect, related_to->helper->name, NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip), NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip)); - kfree(expect); + kmem_cache_free(ip_conntrack_expect_cachep, expect); return -EPERM; } DEBUGP("ip_conntrack: max number of expected " @@ -1049,7 +957,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect, WRITE_UNLOCK(&ip_conntrack_lock); DEBUGP("expect_related: busy!\n"); - kfree(expect); + kmem_cache_free(ip_conntrack_expect_cachep, expect); return -EBUSY; } @@ -1057,6 +965,8 @@ out: ip_conntrack_expect_insert(expect, related_to); WRITE_UNLOCK(&ip_conntrack_lock); + CONNTRACK_STAT_INC(expect_create); + return ret; } @@ -1164,25 +1074,45 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) synchronize_net(); } -/* Refresh conntrack for this many jiffies. */ -void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies) +static inline void ct_add_counters(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb) +{ +#ifdef CONFIG_IP_NF_CT_ACCT + if (skb) { + ct->counters[CTINFO2DIR(ctinfo)].packets++; + ct->counters[CTINFO2DIR(ctinfo)].bytes += + ntohs(skb->nh.iph->tot_len); + } +#endif +} + +/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */ +void ip_ct_refresh_acct(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + unsigned long extra_jiffies) { IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); /* If not in hash table, timer will not be active yet */ - if (!is_confirmed(ct)) + if (!is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; - else { + ct_add_counters(ct, ctinfo, skb); + } else { WRITE_LOCK(&ip_conntrack_lock); /* Need del_timer for race avoidance (may already be dying). */ if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); } + ct_add_counters(ct, ctinfo, skb); WRITE_UNLOCK(&ip_conntrack_lock); } } +int ip_ct_no_defrag; + /* Returns new sk_buff, or NULL */ struct sk_buff * ip_ct_gather_frags(struct sk_buff *skb) @@ -1191,6 +1121,12 @@ ip_ct_gather_frags(struct sk_buff *skb) #ifdef CONFIG_NETFILTER_DEBUG unsigned int olddebug = skb->nf_debug; #endif + + if (unlikely(ip_ct_no_defrag)) { + kfree_skb(skb); + return NULL; + } + if (sk) { sock_hold(sk); skb_orphan(skb); @@ -1221,23 +1157,23 @@ ip_ct_gather_frags(struct sk_buff *skb) } /* Used by ipt_REJECT. */ -static void ip_conntrack_attach(struct sk_buff *nskb, struct nf_ct_info *nfct) +static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; - ct = __ip_conntrack_get(nfct, &ctinfo); - - /* This ICMP is in reverse direction to the packet which - caused it */ + /* This ICMP is in reverse direction to the packet which caused it */ + ct = ip_conntrack_get(skb, &ctinfo); + if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; else ctinfo = IP_CT_RELATED; - /* Attach new skbuff, and increment count */ - nskb->nfct = &ct->infos[ctinfo]; - atomic_inc(&ct->ct_general.use); + /* Attach to new skbuff, and increment count */ + nskb->nfct = &ct->ct_general; + nskb->nfctinfo = ctinfo; + nf_conntrack_get(nskb->nfct); } static inline int @@ -1368,12 +1304,13 @@ void ip_conntrack_cleanup(void) } kmem_cache_destroy(ip_conntrack_cachep); + kmem_cache_destroy(ip_conntrack_expect_cachep); vfree(ip_conntrack_hash); nf_unregister_sockopt(&so_getorigdst); } static int hashsize; -MODULE_PARM(hashsize, "i"); +module_param(hashsize, int, 0400); int __init ip_conntrack_init(void) { @@ -1420,12 +1357,23 @@ int __init ip_conntrack_init(void) printk(KERN_ERR "Unable to create ip_conntrack slab cache\n"); goto err_free_hash; } + + ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect", + sizeof(struct ip_conntrack_expect), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!ip_conntrack_expect_cachep) { + printk(KERN_ERR "Unable to create ip_expect slab cache\n"); + goto err_free_conntrack_slab; + } + /* Don't NEED lock here, but good form anyway. */ WRITE_LOCK(&ip_conntrack_lock); + for (i = 0; i < MAX_IP_CT_PROTO; i++) + ip_ct_protos[i] = &ip_conntrack_generic_protocol; /* Sew in builtin protocols. */ - list_append(&protocol_list, &ip_conntrack_protocol_tcp); - list_append(&protocol_list, &ip_conntrack_protocol_udp); - list_append(&protocol_list, &ip_conntrack_protocol_icmp); + ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp; + ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp; + ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp; WRITE_UNLOCK(&ip_conntrack_lock); for (i = 0; i < ip_conntrack_htable_size; i++) @@ -1439,14 +1387,11 @@ int __init ip_conntrack_init(void) atomic_set(&ip_conntrack_untracked.ct_general.use, 1); /* - and look it like as a confirmed connection */ set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status); - /* - and prepare the ctinfo field for REJECT & NAT. */ - ip_conntrack_untracked.infos[IP_CT_NEW].master = - ip_conntrack_untracked.infos[IP_CT_RELATED].master = - ip_conntrack_untracked.infos[IP_CT_RELATED + IP_CT_IS_REPLY].master = - &ip_conntrack_untracked.ct_general; return ret; +err_free_conntrack_slab: + kmem_cache_destroy(ip_conntrack_cachep); err_free_hash: vfree(ip_conntrack_hash); err_unreg_sockopt: