fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / net / sched / sch_api.c
index 1071471..65825f4 100644 (file)
@@ -15,7 +15,6 @@
  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
@@ -192,21 +191,27 @@ int unregister_qdisc(struct Qdisc_ops *qops)
    (root qdisc, all its children, children of children etc.)
  */
 
-struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
+static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle)
 {
        struct Qdisc *q;
 
-       read_lock_bh(&qdisc_tree_lock);
        list_for_each_entry(q, &dev->qdisc_list, list) {
-               if (q->handle == handle) {
-                       read_unlock_bh(&qdisc_tree_lock);
+               if (q->handle == handle)
                        return q;
-               }
        }
-       read_unlock_bh(&qdisc_tree_lock);
        return NULL;
 }
 
+struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
+{
+       struct Qdisc *q;
+
+       read_lock(&qdisc_tree_lock);
+       q = __qdisc_lookup(dev, handle);
+       read_unlock(&qdisc_tree_lock);
+       return q;
+}
+
 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
 {
        unsigned long cl;
@@ -349,6 +354,26 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
        return oqdisc;
 }
 
+void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
+{
+       struct Qdisc_class_ops *cops;
+       unsigned long cl;
+       u32 parentid;
+
+       if (n == 0)
+               return;
+       while ((parentid = sch->parent)) {
+               sch = __qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
+               cops = sch->ops->cl_ops;
+               if (cops->qlen_notify) {
+                       cl = cops->get(sch, parentid);
+                       cops->qlen_notify(sch, cl);
+                       cops->put(sch, cl);
+               }
+               sch->q.qlen -= n;
+       }
+}
+EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
 
 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
    to device "dev".
@@ -399,84 +424,91 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
 {
        int err;
        struct rtattr *kind = tca[TCA_KIND-1];
-       void *p = NULL;
        struct Qdisc *sch;
        struct Qdisc_ops *ops;
-       int size;
 
        ops = qdisc_lookup_ops(kind);
 #ifdef CONFIG_KMOD
-       if (ops==NULL && tca[TCA_KIND-1] != NULL) {
+       if (ops == NULL && kind != NULL) {
                char name[IFNAMSIZ];
                if (rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
+                       /* We dropped the RTNL semaphore in order to
+                        * perform the module load.  So, even if we
+                        * succeeded in loading the module we have to
+                        * tell the caller to replay the request.  We
+                        * indicate this using -EAGAIN.
+                        * We replay the request because the device may
+                        * go away in the mean time.
+                        */
+                       rtnl_unlock();
                        request_module("sch_%s", name);
+                       rtnl_lock();
                        ops = qdisc_lookup_ops(kind);
+                       if (ops != NULL) {
+                               /* We will try again qdisc_lookup_ops,
+                                * so don't keep a reference.
+                                */
+                               module_put(ops->owner);
+                               err = -EAGAIN;
+                               goto err_out;
+                       }
                }
        }
 #endif
 
-       err = -EINVAL;
+       err = -ENOENT;
        if (ops == NULL)
                goto err_out;
 
-       /* ensure that the Qdisc and the private data are 32-byte aligned */
-       size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST);
-       size += ops->priv_size + QDISC_ALIGN_CONST;
-
-       p = kmalloc(size, GFP_KERNEL);
-       err = -ENOBUFS;
-       if (!p)
+       sch = qdisc_alloc(dev, ops);
+       if (IS_ERR(sch)) {
+               err = PTR_ERR(sch);
                goto err_out2;
-       memset(p, 0, size);
-       sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST)
-                              & ~QDISC_ALIGN_CONST);
-       sch->padded = (char *)sch - (char *)p;
-
-       INIT_LIST_HEAD(&sch->list);
-       skb_queue_head_init(&sch->q);
+       }
 
-       if (handle == TC_H_INGRESS)
+       if (handle == TC_H_INGRESS) {
                sch->flags |= TCQ_F_INGRESS;
-
-       sch->ops = ops;
-       sch->enqueue = ops->enqueue;
-       sch->dequeue = ops->dequeue;
-       sch->dev = dev;
-       dev_hold(dev);
-       atomic_set(&sch->refcnt, 1);
-       sch->stats_lock = &dev->queue_lock;
-       if (handle == 0) {
+               handle = TC_H_MAKE(TC_H_INGRESS, 0);
+       } else if (handle == 0) {
                handle = qdisc_alloc_handle(dev);
                err = -ENOMEM;
                if (handle == 0)
                        goto err_out3;
        }
 
-       if (handle == TC_H_INGRESS)
-                sch->handle =TC_H_MAKE(TC_H_INGRESS, 0);
-        else
-                sch->handle = handle;
+       sch->handle = handle;
 
        if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
+#ifdef CONFIG_NET_ESTIMATOR
+               if (tca[TCA_RATE-1]) {
+                       err = gen_new_estimator(&sch->bstats, &sch->rate_est,
+                                               sch->stats_lock,
+                                               tca[TCA_RATE-1]);
+                       if (err) {
+                               /*
+                                * Any broken qdiscs that would require
+                                * a ops->reset() here? The qdisc was never
+                                * in action so it shouldn't be necessary.
+                                */
+                               if (ops->destroy)
+                                       ops->destroy(sch);
+                               goto err_out3;
+                       }
+               }
+#endif
                qdisc_lock_tree(dev);
                list_add_tail(&sch->list, &dev->qdisc_list);
                qdisc_unlock_tree(dev);
 
-#ifdef CONFIG_NET_ESTIMATOR
-               if (tca[TCA_RATE-1])
-                       gen_new_estimator(&sch->bstats, &sch->rate_est,
-                               sch->stats_lock, tca[TCA_RATE-1]);
-#endif
                return sch;
        }
 err_out3:
        dev_put(dev);
+       kfree((char *) sch - sch->padded);
 err_out2:
        module_put(ops->owner);
 err_out:
        *errp = err;
-       if (p)
-               kfree(p);
        return NULL;
 }
 
@@ -606,14 +638,20 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 
 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 {
-       struct tcmsg *tcm = NLMSG_DATA(n);
-       struct rtattr **tca = arg;
+       struct tcmsg *tcm;
+       struct rtattr **tca;
        struct net_device *dev;
-       u32 clid = tcm->tcm_parent;
-       struct Qdisc *q = NULL;
-       struct Qdisc *p = NULL;
+       u32 clid;
+       struct Qdisc *q, *p;
        int err;
 
+replay:
+       /* Reinit, just in case something touches this. */
+       tcm = NLMSG_DATA(n);
+       tca = arg;
+       clid = tcm->tcm_parent;
+       q = p = NULL;
+
        if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
                return -ENODEV;
 
@@ -707,8 +745,11 @@ create_n_graft:
                q = qdisc_create(dev, tcm->tcm_parent, tca, &err);
         else
                q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
-       if (q == NULL)
+       if (q == NULL) {
+               if (err == -EAGAIN)
+                       goto replay;
                return err;
+       }
 
 graft:
        if (1) {
@@ -733,17 +774,18 @@ graft:
 }
 
 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
-                        u32 pid, u32 seq, unsigned flags, int event)
+                        u32 pid, u32 seq, u16 flags, int event)
 {
        struct tcmsg *tcm;
        struct nlmsghdr  *nlh;
        unsigned char    *b = skb->tail;
        struct gnet_dump d;
 
-       nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
-       nlh->nlmsg_flags = flags;
+       nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
        tcm = NLMSG_DATA(nlh);
        tcm->tcm_family = AF_UNSPEC;
+       tcm->tcm__pad1 = 0;
+       tcm->tcm__pad2 = 0;
        tcm->tcm_ifindex = q->dev->ifindex;
        tcm->tcm_parent = clid;
        tcm->tcm_handle = q->handle;
@@ -799,7 +841,7 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
        }
 
        if (skb->len)
-               return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+               return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 
 err_out:
        kfree_skb(skb);
@@ -821,7 +863,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
                        continue;
                if (idx > s_idx)
                        s_q_idx = 0;
-               read_lock_bh(&qdisc_tree_lock);
+               read_lock(&qdisc_tree_lock);
                q_idx = 0;
                list_for_each_entry(q, &dev->qdisc_list, list) {
                        if (q_idx < s_q_idx) {
@@ -830,12 +872,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
                        }
                        if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
                                          cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
-                               read_unlock_bh(&qdisc_tree_lock);
+                               read_unlock(&qdisc_tree_lock);
                                goto done;
                        }
                        q_idx++;
                }
-               read_unlock_bh(&qdisc_tree_lock);
+               read_unlock(&qdisc_tree_lock);
        }
 
 done:
@@ -970,7 +1012,7 @@ out:
 
 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
                          unsigned long cl,
-                         u32 pid, u32 seq, unsigned flags, int event)
+                         u32 pid, u32 seq, u16 flags, int event)
 {
        struct tcmsg *tcm;
        struct nlmsghdr  *nlh;
@@ -978,8 +1020,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
        struct gnet_dump d;
        struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
 
-       nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
-       nlh->nlmsg_flags = flags;
+       nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
        tcm = NLMSG_DATA(nlh);
        tcm->tcm_family = AF_UNSPEC;
        tcm->tcm_ifindex = q->dev->ifindex;
@@ -1024,7 +1065,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
                return -EINVAL;
        }
 
-       return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+       return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 }
 
 struct qdisc_dump_args
@@ -1059,7 +1100,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
        s_t = cb->args[0];
        t = 0;
 
-       read_lock_bh(&qdisc_tree_lock);
+       read_lock(&qdisc_tree_lock);
        list_for_each_entry(q, &dev->qdisc_list, list) {
                if (t < s_t || !q->ops->cl_ops ||
                    (tcm->tcm_parent &&
@@ -1081,7 +1122,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
                        break;
                t++;
        }
-       read_unlock_bh(&qdisc_tree_lock);
+       read_unlock(&qdisc_tree_lock);
 
        cb->args[0] = t;
 
@@ -1097,7 +1138,7 @@ int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
        struct tcf_result *res)
 {
        int err = 0;
-       u32 protocol = skb->protocol;
+       __be16 protocol = skb->protocol;
 #ifdef CONFIG_NET_CLS_ACT
        struct tcf_proto *otp = tp;
 reclassify:
@@ -1178,7 +1219,7 @@ EXPORT_SYMBOL(psched_time_base);
  * with 32-bit get_cycles(). Safe up to 4GHz CPU.
  */
 static void psched_tick(unsigned long);
-static struct timer_list psched_timer = TIMER_INITIALIZER(psched_tick, 0, 0);
+static DEFINE_TIMER(psched_timer, psched_tick, 0, 0);
 
 static void psched_tick(unsigned long dummy)
 {