X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fnetfilter%2Fnf_conntrack_core.c;h=d622ddf08bb05d2cc23a2f6c5672d66edadb23aa;hb=987b0145d94eecf292d8b301228356f44611ab7c;hp=f9b83f91371ac27ac2c127fea81ed0bbc6e4f0c5;hpb=f7ed79d23a47594e7834d66a8f14449796d4f3e6;p=linux-2.6.git diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f9b83f913..d622ddf08 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -3,7 +3,7 @@ extension. */ /* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team + * (C) 2002-2005 Netfilter Core Team * (C) 2003,2004 USAGI/WIDE Project * * This program is free software; you can redistribute it and/or modify @@ -20,11 +20,6 @@ * - generalize L3 protocol denendent part. * 23 Mar 2004: Yasuyuki Kozakai @USAGI * - add support various size of conntrack structures. - * 26 Jan 2006: Harald Welte - * - restructure nf_conn (introduce nf_conn_help) - * - redesign 'features' how they were originally intended - * 26 Feb 2006: Pablo Neira Ayuso - * - add support for L3 protocol module load on demand. * * Derived from net/ipv4/netfilter/ip_conntrack_core.c */ @@ -60,7 +55,7 @@ #include #include -#define NF_CONNTRACK_VERSION "0.5.0" +#define NF_CONNTRACK_VERSION "0.4.1" #if 0 #define DEBUGP printk @@ -87,11 +82,11 @@ unsigned int nf_ct_log_invalid; static LIST_HEAD(unconfirmed); static int nf_conntrack_vmalloc; -static unsigned int nf_conntrack_next_id; -static unsigned int nf_conntrack_expect_next_id; +static unsigned int nf_conntrack_next_id = 1; +static unsigned int nf_conntrack_expect_next_id = 1; #ifdef CONFIG_NF_CONNTRACK_EVENTS -ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain); -ATOMIC_NOTIFIER_HEAD(nf_conntrack_expect_chain); +struct notifier_block *nf_conntrack_chain; +struct notifier_block *nf_conntrack_expect_chain; DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache); @@ -103,7 +98,7 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) DEBUGP("ecache: delivering events for %p\n", ecache->ct); if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) && ecache->events) - atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events, + notifier_call_chain(&nf_conntrack_chain, ecache->events, ecache->ct); ecache->events = 0; @@ -146,7 +141,7 @@ static void nf_ct_event_cache_flush(void) struct nf_conntrack_ecache *ecache; int cpu; - for_each_possible_cpu(cpu) { + for_each_cpu(cpu) { ecache = &per_cpu(nf_conntrack_ecache, cpu); if (ecache->ct) nf_ct_put(ecache->ct); @@ -178,13 +173,16 @@ static struct { /* allocated slab cache + modules which uses this slab cache */ int use; + /* Initialization */ + int (*init_conntrack)(struct nf_conn *, u_int32_t); + } nf_ct_cache[NF_CT_F_NUM]; /* protect members of nf_ct_cache except of "use" */ DEFINE_RWLOCK(nf_ct_cache_lock); /* This avoids calling kmem_cache_create() with same name simultaneously */ -static DEFINE_MUTEX(nf_ct_cache_mutex); +DECLARE_MUTEX(nf_ct_cache_mutex); extern struct nf_conntrack_protocol nf_conntrack_generic_protocol; struct nf_conntrack_protocol * @@ -205,8 +203,10 @@ nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol) preempt_disable(); p = __nf_ct_proto_find(l3proto, protocol); - if (!try_module_get(p->me)) - p = &nf_conntrack_generic_protocol; + if (p) { + if (!try_module_get(p->me)) + p = &nf_conntrack_generic_protocol; + } preempt_enable(); return p; @@ -224,8 +224,10 @@ nf_ct_l3proto_find_get(u_int16_t l3proto) preempt_disable(); p = __nf_ct_l3proto_find(l3proto); - if (!try_module_get(p->me)) - p = &nf_conntrack_generic_l3proto; + if (p) { + if (!try_module_get(p->me)) + p = &nf_conntrack_generic_l3proto; + } preempt_enable(); return p; @@ -236,35 +238,6 @@ void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p) module_put(p->me); } -int -nf_ct_l3proto_try_module_get(unsigned short l3proto) -{ - int ret; - struct nf_conntrack_l3proto *p; - -retry: p = nf_ct_l3proto_find_get(l3proto); - if (p == &nf_conntrack_generic_l3proto) { - ret = request_module("nf_conntrack-%d", l3proto); - if (!ret) - goto retry; - - return -EPROTOTYPE; - } - - return 0; -} - -void nf_ct_l3proto_module_put(unsigned short l3proto) -{ - struct nf_conntrack_l3proto *p; - - preempt_disable(); - p = __nf_ct_l3proto_find(l3proto); - preempt_enable(); - - module_put(p->me); -} - static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; @@ -286,8 +259,21 @@ static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple) nf_conntrack_hash_rnd); } +/* Initialize "struct nf_conn" which has spaces for helper */ +static int +init_conntrack_for_helper(struct nf_conn *conntrack, u_int32_t features) +{ + + conntrack->help = (union nf_conntrack_help *) + (((unsigned long)conntrack->data + + (__alignof__(union nf_conntrack_help) - 1)) + & (~((unsigned long)(__alignof__(union nf_conntrack_help) -1)))); + return 0; +} + int nf_conntrack_register_cache(u_int32_t features, const char *name, - size_t size) + size_t size, + int (*init)(struct nf_conn *, u_int32_t)) { int ret = 0; char *cache_name; @@ -302,7 +288,7 @@ int nf_conntrack_register_cache(u_int32_t features, const char *name, return -EINVAL; } - mutex_lock(&nf_ct_cache_mutex); + down(&nf_ct_cache_mutex); write_lock_bh(&nf_ct_cache_lock); /* e.g: multiple helpers are loaded */ @@ -310,7 +296,8 @@ int nf_conntrack_register_cache(u_int32_t features, const char *name, DEBUGP("nf_conntrack_register_cache: already resisterd.\n"); if ((!strncmp(nf_ct_cache[features].name, name, NF_CT_FEATURES_NAMELEN)) - && nf_ct_cache[features].size == size) { + && nf_ct_cache[features].size == size + && nf_ct_cache[features].init_conntrack == init) { DEBUGP("nf_conntrack_register_cache: reusing.\n"); nf_ct_cache[features].use++; ret = 0; @@ -318,7 +305,7 @@ int nf_conntrack_register_cache(u_int32_t features, const char *name, ret = -EBUSY; write_unlock_bh(&nf_ct_cache_lock); - mutex_unlock(&nf_ct_cache_mutex); + up(&nf_ct_cache_mutex); return ret; } write_unlock_bh(&nf_ct_cache_lock); @@ -353,6 +340,7 @@ int nf_conntrack_register_cache(u_int32_t features, const char *name, write_lock_bh(&nf_ct_cache_lock); nf_ct_cache[features].use = 1; nf_ct_cache[features].size = size; + nf_ct_cache[features].init_conntrack = init; nf_ct_cache[features].cachep = cachep; nf_ct_cache[features].name = cache_name; write_unlock_bh(&nf_ct_cache_lock); @@ -362,7 +350,7 @@ int nf_conntrack_register_cache(u_int32_t features, const char *name, out_free_name: kfree(cache_name); out_up_mutex: - mutex_unlock(&nf_ct_cache_mutex); + up(&nf_ct_cache_mutex); return ret; } @@ -377,18 +365,19 @@ void nf_conntrack_unregister_cache(u_int32_t features) * slab cache. */ DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features); - mutex_lock(&nf_ct_cache_mutex); + down(&nf_ct_cache_mutex); write_lock_bh(&nf_ct_cache_lock); if (--nf_ct_cache[features].use > 0) { write_unlock_bh(&nf_ct_cache_lock); - mutex_unlock(&nf_ct_cache_mutex); + up(&nf_ct_cache_mutex); return; } cachep = nf_ct_cache[features].cachep; name = nf_ct_cache[features].name; nf_ct_cache[features].cachep = NULL; nf_ct_cache[features].name = NULL; + nf_ct_cache[features].init_conntrack = NULL; nf_ct_cache[features].size = 0; write_unlock_bh(&nf_ct_cache_lock); @@ -397,7 +386,7 @@ void nf_conntrack_unregister_cache(u_int32_t features) kmem_cache_destroy(cachep); kfree(name); - mutex_unlock(&nf_ct_cache_mutex); + up(&nf_ct_cache_mutex); } int @@ -443,15 +432,11 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) { - struct nf_conn_help *master_help = nfct_help(exp->master); - - NF_CT_ASSERT(master_help); ASSERT_WRITE_LOCK(&nf_conntrack_lock); NF_CT_ASSERT(!timer_pending(&exp->timeout)); - list_del(&exp->list); NF_CT_STAT_INC(expect_delete); - master_help->expecting--; + exp->master->expecting--; nf_conntrack_expect_put(exp); } @@ -523,10 +508,9 @@ find_expectation(const struct nf_conntrack_tuple *tuple) void nf_ct_remove_expectations(struct nf_conn *ct) { struct nf_conntrack_expect *i, *tmp; - struct nf_conn_help *help = nfct_help(ct); /* Optimization: most connection never expect any others. */ - if (!help || help->expecting == 0) + if (ct->expecting == 0) return; list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) { @@ -729,7 +713,6 @@ __nf_conntrack_confirm(struct sk_buff **pskb) conntrack_tuple_cmp, struct nf_conntrack_tuple_hash *, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { - struct nf_conn_help *help; /* Remove from unconfirmed list */ list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); @@ -743,8 +726,7 @@ __nf_conntrack_confirm(struct sk_buff **pskb) set_bit(IPS_CONFIRMED_BIT, &ct->status); NF_CT_STAT_INC(insert); write_unlock_bh(&nf_conntrack_lock); - help = nfct_help(ct); - if (help && help->helper) + if (ct->helper) nf_conntrack_event_cache(IPCT_HELPER, *pskb); #ifdef CONFIG_NF_NAT_NEEDED if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || @@ -860,9 +842,8 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, { struct nf_conn *conntrack = NULL; u_int32_t features = 0; - struct nf_conntrack_helper *helper; - if (unlikely(!nf_conntrack_hash_rnd_initted)) { + if (!nf_conntrack_hash_rnd_initted) { get_random_bytes(&nf_conntrack_hash_rnd, 4); nf_conntrack_hash_rnd_initted = 1; } @@ -882,11 +863,8 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, /* find features needed by this conntrack. */ features = l3proto->get_features(orig); - - /* FIXME: protect helper list per RCU */ read_lock_bh(&nf_conntrack_lock); - helper = __nf_ct_helper_find(repl); - if (helper) + if (__nf_ct_helper_find(repl) != NULL) features |= NF_CT_F_HELP; read_unlock_bh(&nf_conntrack_lock); @@ -894,7 +872,7 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, read_lock_bh(&nf_ct_cache_lock); - if (unlikely(!nf_ct_cache[features].use)) { + if (!nf_ct_cache[features].use) { DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n", features); goto out; @@ -908,10 +886,12 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, memset(conntrack, 0, nf_ct_cache[features].size); conntrack->features = features; - if (helper) { - struct nf_conn_help *help = nfct_help(conntrack); - NF_CT_ASSERT(help); - help->helper = helper; + if (nf_ct_cache[features].init_conntrack && + nf_ct_cache[features].init_conntrack(conntrack, features) < 0) { + DEBUGP("nf_conntrack_alloc: failed to init\n"); + kmem_cache_free(nf_ct_cache[features].cachep, conntrack); + conntrack = NULL; + goto out; } atomic_set(&conntrack->ct_general.use, 1); @@ -992,8 +972,11 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, #endif nf_conntrack_get(&conntrack->master->ct_general); NF_CT_STAT_INC(expect_new); - } else + } else { + conntrack->helper = __nf_ct_helper_find(&repl_tuple); + NF_CT_STAT_INC(new); + } /* Overload tuple linked list to put us in unconfirmed list. */ list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); @@ -1223,16 +1206,14 @@ void nf_conntrack_expect_put(struct nf_conntrack_expect *exp) static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp) { - struct nf_conn_help *master_help = nfct_help(exp->master); - atomic_inc(&exp->use); - master_help->expecting++; + exp->master->expecting++; list_add(&exp->list, &nf_conntrack_expect_list); init_timer(&exp->timeout); exp->timeout.data = (unsigned long)exp; exp->timeout.function = expectation_timed_out; - exp->timeout.expires = jiffies + master_help->helper->timeout * HZ; + exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ; add_timer(&exp->timeout); exp->id = ++nf_conntrack_expect_next_id; @@ -1258,12 +1239,10 @@ static void evict_oldest_expect(struct nf_conn *master) static inline int refresh_timer(struct nf_conntrack_expect *i) { - struct nf_conn_help *master_help = nfct_help(i->master); - if (!del_timer(&i->timeout)) return 0; - i->timeout.expires = jiffies + master_help->helper->timeout*HZ; + i->timeout.expires = jiffies + i->master->helper->timeout*HZ; add_timer(&i->timeout); return 1; } @@ -1272,11 +1251,8 @@ int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) { struct nf_conntrack_expect *i; struct nf_conn *master = expect->master; - struct nf_conn_help *master_help = nfct_help(master); int ret; - NF_CT_ASSERT(master_help); - DEBUGP("nf_conntrack_expect_related %p\n", related_to); DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple); DEBUGP("mask: "); NF_CT_DUMP_TUPLE(&expect->mask); @@ -1295,8 +1271,8 @@ int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) } } /* Will be over limit? */ - if (master_help->helper->max_expected && - master_help->expecting >= master_help->helper->max_expected) + if (master->helper->max_expected && + master->expecting >= master->helper->max_expected) evict_oldest_expect(master); nf_conntrack_expect_insert(expect); @@ -1307,6 +1283,24 @@ out: return ret; } +/* Alter reply tuple (maybe alter helper). This is for NAT, and is + implicitly racy: see __nf_conntrack_confirm */ +void nf_conntrack_alter_reply(struct nf_conn *conntrack, + const struct nf_conntrack_tuple *newreply) +{ + write_lock_bh(&nf_conntrack_lock); + /* Should be unconfirmed, so not in hash table yet */ + NF_CT_ASSERT(!nf_ct_is_confirmed(conntrack)); + + DEBUGP("Altering reply tuple of %p to ", conntrack); + NF_CT_DUMP_TUPLE(newreply); + + conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; + if (!conntrack->master && conntrack->expecting == 0) + conntrack->helper = __nf_ct_helper_find(newreply); + write_unlock_bh(&nf_conntrack_lock); +} + int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { int ret; @@ -1314,8 +1308,9 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help", sizeof(struct nf_conn) - + sizeof(struct nf_conn_help) - + __alignof__(struct nf_conn_help)); + + sizeof(union nf_conntrack_help) + + __alignof__(union nf_conntrack_help), + init_conntrack_for_helper); if (ret < 0) { printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n"); return ret; @@ -1343,12 +1338,9 @@ __nf_conntrack_helper_find_byname(const char *name) static inline int unhelp(struct nf_conntrack_tuple_hash *i, const struct nf_conntrack_helper *me) { - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); - struct nf_conn_help *help = nfct_help(ct); - - if (help && help->helper == me) { - nf_conntrack_event(IPCT_HELPER, ct); - help->helper = NULL; + if (nf_ct_tuplehash_to_ctrack(i)->helper == me) { + nf_conntrack_event(IPCT_HELPER, nf_ct_tuplehash_to_ctrack(i)); + nf_ct_tuplehash_to_ctrack(i)->helper = NULL; } return 0; } @@ -1364,8 +1356,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) /* Get rid of expectations */ list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) { - struct nf_conn_help *help = nfct_help(exp->master); - if (help->helper == me && del_timer(&exp->timeout)) { + if (exp->master->helper == me && del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_conntrack_expect_put(exp); } @@ -1432,8 +1423,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, #include #include -#include - /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be * in ip_conntrack_core, since we don't want the protocols to autoload @@ -1708,7 +1697,7 @@ int __init nf_conntrack_init(void) } ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic", - sizeof(struct nf_conn)); + sizeof(struct nf_conn), NULL); if (ret < 0) { printk(KERN_ERR "Unable to create nf_conn slab cache\n"); goto err_free_hash;