vserver 1.9.5.x5
[linux-2.6.git] / net / sched / ipt.c
index 386d948..886f463 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
+#include <linux/kmod.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <linux/tc_act/tc_ipt.h>
 static u32 idx_gen;
 static struct tcf_ipt *tcf_ipt_ht[MY_TAB_SIZE];
 /* ipt hash table lock */
-static rwlock_t ipt_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(ipt_lock);
 
 /* ovewrride the defaults */
-#define tcf_st  tcf_ipt
-#define tcf_t_lock   ipt_lock
-#define tcf_ht tcf_ipt_ht
+#define tcf_st         tcf_ipt
+#define tcf_t_lock     ipt_lock
+#define tcf_ht         tcf_ipt_ht
 
+#define CONFIG_NET_ACT_INIT
 #include <net/pkt_act.h>
 
-static inline int
-init_targ(struct tcf_ipt *p)
+static int
+ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
 {
        struct ipt_target *target;
        int ret = 0;
-       struct ipt_entry_target *t = p->t;
-       target = __ipt_find_target_lock(t->u.user.name, &ret);
 
-       if (!target) {
-               printk("init_targ: Failed to find %s\n", t->u.user.name);
-               return -1;
-       }
+       target = ipt_find_target(t->u.user.name, t->u.user.revision);
+       if (!target)
+               return -ENOENT;
 
-       DPRINTK("init_targ: found %s\n", target->name);
-       /* we really need proper ref counting
-        seems to be only needed for modules?? Talk to laforge */
-/*      if (target->me)
-              __MOD_INC_USE_COUNT(target->me);
-*/
+       DPRINTK("ipt_init_target: found %s\n", target->name);
        t->u.kernel.target = target;
 
-       __ipt_mutex_up();
-
        if (t->u.kernel.target->checkentry
-           && !t->u.kernel.target->checkentry(p->tname, NULL, t->data,
-                                              t->u.target_size
-                                              - sizeof (*t), p->hook)) {
-/*              if (t->u.kernel.target->me)
-             __MOD_DEC_USE_COUNT(t->u.kernel.target->me);
-*/
-               DPRINTK("ip_tables: check failed for `%s'.\n",
+           && !t->u.kernel.target->checkentry(table, NULL, t->data,
+                                              t->u.target_size - sizeof(*t),
+                                              hook)) {
+               DPRINTK("ipt_init_target: check failed for `%s'.\n",
                        t->u.kernel.target->name);
+               module_put(t->u.kernel.target->me);
                ret = -EINVAL;
        }
 
        return ret;
 }
 
+static void
+ipt_destroy_target(struct ipt_entry_target *t)
+{
+       if (t->u.kernel.target->destroy)
+               t->u.kernel.target->destroy(t->data,
+                                           t->u.target_size - sizeof(*t));
+        module_put(t->u.kernel.target->me);
+}
+
 static int
-tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, int ovr, int bind)
+tcf_ipt_release(struct tcf_ipt *p, int bind)
 {
-       struct ipt_entry_target *t;
-       unsigned h;
-       struct rtattr *tb[TCA_IPT_MAX];
-       struct tcf_ipt *p;
        int ret = 0;
-       u32 index = 0;
-       u32 hook = 0;
-
-       if (NULL == a || NULL == rta ||
-           (rtattr_parse(tb, TCA_IPT_MAX, RTA_DATA(rta), RTA_PAYLOAD(rta)) <
-            0)) {
-               return -1;
-       }
-
-
-       if (tb[TCA_IPT_INDEX - 1]) {
-               index = *(u32 *) RTA_DATA(tb[TCA_IPT_INDEX - 1]);
-               DPRINTK("ipt index %d\n", index);
-       }
-
-       if (index && (p = tcf_hash_lookup(index)) != NULL) {
-               a->priv = (void *) p;
-               spin_lock(&p->lock);
-               if (bind) {
-                       p->bindcnt += 1;
-                       p->refcnt += 1;
+       if (p) {
+               if (bind)
+                       p->bindcnt--;
+               p->refcnt--;
+               if (p->bindcnt <= 0 && p->refcnt <= 0) {
+                       ipt_destroy_target(p->t);
+                       kfree(p->tname);
+                       kfree(p->t);
+                       tcf_hash_destroy(p);
+                       ret = ACT_P_DELETED;
                }
-               if (ovr) {
-                       goto override;
-               }
-               spin_unlock(&p->lock);
-               return ret;
        }
+       return ret;
+}
 
-       if (NULL == tb[TCA_IPT_TARG - 1] || NULL == tb[TCA_IPT_HOOK - 1]) {
-               return -1;
-       }
+static int
+tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
+             int ovr, int bind)
+{
+       struct rtattr *tb[TCA_IPT_MAX];
+       struct tcf_ipt *p;
+       struct ipt_entry_target *td, *t;
+       char *tname;
+       int ret = 0, err;
+       u32 hook = 0;
+       u32 index = 0;
 
-       p = kmalloc(sizeof (*p), GFP_KERNEL);
-       if (p == NULL)
-               return -1;
-
-       memset(p, 0, sizeof (*p));
-       p->refcnt = 1;
-       ret = 1;
-       spin_lock_init(&p->lock);
-       p->stats_lock = &p->lock;
-       if (bind)
-               p->bindcnt = 1;
-
-override:
-       hook = *(u32 *) RTA_DATA(tb[TCA_IPT_HOOK - 1]);
-
-       t = (struct ipt_entry_target *) RTA_DATA(tb[TCA_IPT_TARG - 1]);
-
-       p->t = kmalloc(t->u.target_size, GFP_KERNEL);
-       if (p->t == NULL) {
-               if (ovr) {
-                       printk("ipt policy messed up \n");
-                       spin_unlock(&p->lock);
-                       return -1;
+       if (rta == NULL || rtattr_parse_nested(tb, TCA_IPT_MAX, rta) < 0)
+               return -EINVAL;
+
+       if (tb[TCA_IPT_HOOK-1] == NULL ||
+           RTA_PAYLOAD(tb[TCA_IPT_HOOK-1]) < sizeof(u32))
+               return -EINVAL;
+       if (tb[TCA_IPT_TARG-1] == NULL ||
+           RTA_PAYLOAD(tb[TCA_IPT_TARG-1]) < sizeof(*t))
+               return -EINVAL;
+       td = (struct ipt_entry_target *)RTA_DATA(tb[TCA_IPT_TARG-1]);
+       if (RTA_PAYLOAD(tb[TCA_IPT_TARG-1]) < td->u.target_size)
+               return -EINVAL;
+
+       if (tb[TCA_IPT_INDEX-1] != NULL &&
+           RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32))
+               index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]);
+
+       p = tcf_hash_check(index, a, ovr, bind);
+       if (p == NULL) {
+               p = tcf_hash_create(index, est, a, sizeof(*p), ovr, bind);
+               if (p == NULL)
+                       return -ENOMEM;
+               ret = ACT_P_CREATED;
+       } else {
+               if (!ovr) {
+                       tcf_ipt_release(p, bind);
+                       return -EEXIST;
                }
-               kfree(p);
-               return -1;
        }
 
-       memcpy(p->t, RTA_DATA(tb[TCA_IPT_TARG - 1]), t->u.target_size);
-       DPRINTK(" target NAME %s size %d data[0] %x data[1] %x\n",
-               t->u.user.name, t->u.target_size, t->data[0], t->data[1]);
+       hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]);
 
-       p->tname = kmalloc(IFNAMSIZ, GFP_KERNEL);
+       err = -ENOMEM;
+       tname = kmalloc(IFNAMSIZ, GFP_KERNEL);
+       if (tname == NULL)
+               goto err1;
+       if (tb[TCA_IPT_TABLE - 1] == NULL ||
+           rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ)
+               strcpy(tname, "mangle");
 
-       if (p->tname == NULL) {
-               if (ovr) {
-                       printk("ipt policy messed up 2 \n");
-                       spin_unlock(&p->lock);
-                       return -1;
-               }
-               kfree(p->t);
-               kfree(p);
-               return -1;
-       } else {
-               int csize = IFNAMSIZ - 1;
-
-               memset(p->tname, 0, IFNAMSIZ);
-               if (tb[TCA_IPT_TABLE - 1]) {
-                       if (strlen((char *) RTA_DATA(tb[TCA_IPT_TABLE - 1])) <
-                           csize)
-                               csize = strlen(RTA_DATA(tb[TCA_IPT_TABLE - 1]));
-                       strncpy(p->tname, RTA_DATA(tb[TCA_IPT_TABLE - 1]),
-                               csize);
-                       DPRINTK("table name %s\n", p->tname);
-               } else {
-                       strncpy(p->tname, "mangle", 1 + strlen("mangle"));
-               }
-       }
+       t = kmalloc(td->u.target_size, GFP_KERNEL);
+       if (t == NULL)
+               goto err2;
+       memcpy(t, td, td->u.target_size);
 
-       if (0 > init_targ(p)) {
-               if (ovr) {
-                       printk("ipt policy messed up 2 \n");
-                       spin_unlock(&p->lock);
-                       return -1;
-               }
+       if ((err = ipt_init_target(t, tname, hook)) < 0)
+               goto err3;
+
+       spin_lock_bh(&p->lock);
+       if (ret != ACT_P_CREATED) {
+               ipt_destroy_target(p->t);
                kfree(p->tname);
                kfree(p->t);
-               kfree(p);
-               return -1;
-       }
-
-       if (ovr) {
-               spin_unlock(&p->lock);
-               return -1;
        }
-
-       p->index = index ? : tcf_hash_new_index();
-
-       p->tm.lastuse = jiffies;
-       /*
-       p->tm.expires = jiffies;
-       */
-       p->tm.install = jiffies;
-#ifdef CONFIG_NET_ESTIMATOR
-       if (est)
-               gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
-#endif
-       h = tcf_hash(p->index);
-       write_lock_bh(&ipt_lock);
-       p->next = tcf_ipt_ht[h];
-       tcf_ipt_ht[h] = p;
-       write_unlock_bh(&ipt_lock);
-       a->priv = (void *) p;
+       p->tname = tname;
+       p->t     = t;
+       p->hook  = hook;
+       spin_unlock_bh(&p->lock);
+       if (ret == ACT_P_CREATED)
+               tcf_hash_insert(p);
        return ret;
 
+err3:
+       kfree(t);
+err2:
+       kfree(tname);
+err1:
+       kfree(p);
+       return err;
 }
 
 static int
 tcf_ipt_cleanup(struct tc_action *a, int bind)
 {
-       struct tcf_ipt *p;
-       p = PRIV(a,ipt);
-       if (NULL != p)
-               return tcf_hash_release(p, bind);
-       return 0;
+       struct tcf_ipt *p = PRIV(a, ipt);
+       return tcf_ipt_release(p, bind);
 }
 
 static int
 tcf_ipt(struct sk_buff **pskb, struct tc_action *a)
 {
        int ret = 0, result = 0;
-       struct tcf_ipt *p;
+       struct tcf_ipt *p = PRIV(a, ipt);
        struct sk_buff *skb = *pskb;
 
-       p = PRIV(a,ipt);
-
-       if (NULL == p || NULL == skb) {
-               return -1;
+       if (skb_cloned(skb)) {
+               if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+                       return TC_ACT_UNSPEC;
        }
 
        spin_lock(&p->lock);
@@ -259,17 +218,12 @@ tcf_ipt(struct sk_buff **pskb, struct tc_action *a)
        p->bstats.bytes += skb->len;
        p->bstats.packets++;
 
-       if (skb_cloned(skb) ) {
-               if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
-                       return -1;
-               }
-       }
        /* yes, we have to worry about both in and out dev
         worry later - danger - this API seems to have changed
         from earlier kernels */
 
        ret = p->t->u.kernel.target->target(&skb, skb->dev, NULL,
-                                           p->hook, p->t->data, (void *)NULL);
+                                           p->hook, p->t->data, NULL);
        switch (ret) {
        case NF_ACCEPT:
                result = TC_ACT_OK;
@@ -299,22 +253,15 @@ tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
        struct tcf_t tm;
        struct tc_cnt c;
        unsigned char *b = skb->tail;
+       struct tcf_ipt *p = PRIV(a, ipt);
 
-       struct tcf_ipt *p;
-
-       p = PRIV(a,ipt);
-       if (NULL == p) {
-               printk("BUG: tcf_ipt_dump called with NULL params\n");
-               goto rtattr_failure;
-       }
        /* for simple targets kernel size == user size
        ** user name = target name
        ** for foolproof you need to not assume this
        */
 
        t = kmalloc(p->t->u.user.target_size, GFP_ATOMIC);
-
-       if (NULL == t)
+       if (t == NULL)
                goto rtattr_failure;
 
        c.bindcnt = p->bindcnt - bind;
@@ -324,10 +271,10 @@ tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 
        DPRINTK("\ttcf_ipt_dump tablename %s length %d\n", p->tname,
                strlen(p->tname));
-       DPRINTK
-           ("\tdump target name %s size %d size user %d data[0] %x data[1] %x\n",
-            p->t->u.kernel.target->name, p->t->u.target_size, p->t->u.user.target_size,
-            p->t->data[0], p->t->data[1]);
+       DPRINTK("\tdump target name %s size %d size user %d "
+               "data[0] %x data[1] %x\n", p->t->u.kernel.target->name,
+               p->t->u.target_size, p->t->u.user.target_size,
+               p->t->data[0], p->t->data[1]);
        RTA_PUT(skb, TCA_IPT_TARG, p->t->u.user.target_size, t);
        RTA_PUT(skb, TCA_IPT_INDEX, 4, &p->index);
        RTA_PUT(skb, TCA_IPT_HOOK, 4, &p->hook);
@@ -345,7 +292,6 @@ tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 }
 
 static struct tc_action_ops act_ipt_ops = {
-       .next           =       NULL,
        .kind           =       "ipt",
        .type           =       TCA_ACT_IPT,
        .capab          =       TCA_CAP_NONE,