#include <linux/seq_file.h>
#include <linux/kmod.h>
#include <linux/list.h>
+#include <linux/bitops.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <asm/processor.h>
#include <asm/uaccess.h>
#include <asm/system.h>
-#include <asm/bitops.h>
static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
struct Qdisc *old, struct Qdisc *new);
*/
/* Protects list of registered TC modules. It is pure SMP lock. */
-static rwlock_t qdisc_mod_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(qdisc_mod_lock);
/************************************************
{
struct Qdisc *q;
+ read_lock_bh(&qdisc_tree_lock);
list_for_each_entry(q, &dev->qdisc_list, list) {
- if (q->handle == handle)
+ if (q->handle == handle) {
+ read_unlock_bh(&qdisc_tree_lock);
return q;
+ }
}
+ read_unlock_bh(&qdisc_tree_lock);
return NULL;
}
-struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
+static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
{
unsigned long cl;
struct Qdisc *leaf;
/* Find queueing discipline by name */
-struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
+static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
{
struct Qdisc_ops *q = NULL;
if (kind) {
read_lock(&qdisc_mod_lock);
for (q = qdisc_base; q; q = q->next) {
- if (rtattr_strcmp(kind, q->id) == 0)
+ if (rtattr_strcmp(kind, q->id) == 0) {
+ if (!try_module_get(q->owner))
+ q = NULL;
break;
+ }
}
read_unlock(&qdisc_mod_lock);
}
/* Allocate an unique handle from space managed by kernel */
-u32 qdisc_alloc_handle(struct net_device *dev)
+static u32 qdisc_alloc_handle(struct net_device *dev)
{
int i = 0x10000;
static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
dev_deactivate(dev);
qdisc_lock_tree(dev);
- if (qdisc && qdisc->flags&TCQ_F_INGRES) {
+ if (qdisc && qdisc->flags&TCQ_F_INGRESS) {
oqdisc = dev->qdisc_ingress;
/* Prune old scheduler */
if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
Old qdisc is not destroyed but returned in *old.
*/
-int qdisc_graft(struct net_device *dev, struct Qdisc *parent, u32 classid,
- struct Qdisc *new, struct Qdisc **old)
+static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
+ u32 classid,
+ struct Qdisc *new, struct Qdisc **old)
{
int err = 0;
struct Qdisc *q = *old;
if (parent == NULL) {
- if (q && q->flags&TCQ_F_INGRES) {
+ if (q && q->flags&TCQ_F_INGRESS) {
*old = dev_graft_qdisc(dev, q);
} else {
*old = dev_graft_qdisc(dev, new);
{
int err;
struct rtattr *kind = tca[TCA_KIND-1];
- void *p = NULL;
struct Qdisc *sch;
struct Qdisc_ops *ops;
- int size;
ops = qdisc_lookup_ops(kind);
#ifdef CONFIG_KMOD
- if (ops==NULL && tca[TCA_KIND-1] != NULL) {
- if (RTA_PAYLOAD(kind) <= IFNAMSIZ) {
- request_module("sch_%s", (char*)RTA_DATA(kind));
+ if (ops == NULL && kind != NULL) {
+ char name[IFNAMSIZ];
+ if (rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
+ /* We dropped the RTNL semaphore in order to
+ * perform the module load. So, even if we
+ * succeeded in loading the module we have to
+ * tell the caller to replay the request. We
+ * indicate this using -EAGAIN.
+ * We replay the request because the device may
+ * go away in the mean time.
+ */
+ rtnl_unlock();
+ request_module("sch_%s", name);
+ rtnl_lock();
ops = qdisc_lookup_ops(kind);
+ if (ops != NULL) {
+ /* We will try again qdisc_lookup_ops,
+ * so don't keep a reference.
+ */
+ module_put(ops->owner);
+ err = -EAGAIN;
+ goto err_out;
+ }
}
}
#endif
err = -EINVAL;
if (ops == NULL)
goto err_out;
- err = -EBUSY;
- if (!try_module_get(ops->owner))
- goto err_out;
- /* ensure that the Qdisc and the private data are 32-byte aligned */
- size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST);
- size += ops->priv_size + QDISC_ALIGN_CONST;
-
- p = kmalloc(size, GFP_KERNEL);
- err = -ENOBUFS;
- if (!p)
+ sch = qdisc_alloc(dev, ops);
+ if (IS_ERR(sch)) {
+ err = PTR_ERR(sch);
goto err_out2;
- memset(p, 0, size);
- sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST)
- & ~QDISC_ALIGN_CONST);
- sch->padded = (char *)sch - (char *)p;
-
- INIT_LIST_HEAD(&sch->list);
- skb_queue_head_init(&sch->q);
-
- if (handle == TC_H_INGRESS)
- sch->flags |= TCQ_F_INGRES;
-
- sch->ops = ops;
- sch->enqueue = ops->enqueue;
- sch->dequeue = ops->dequeue;
- sch->dev = dev;
- dev_hold(dev);
- atomic_set(&sch->refcnt, 1);
- sch->stats_lock = &dev->queue_lock;
- if (handle == 0) {
+ }
+
+ if (handle == TC_H_INGRESS) {
+ sch->flags |= TCQ_F_INGRESS;
+ handle = TC_H_MAKE(TC_H_INGRESS, 0);
+ } else if (handle == 0) {
handle = qdisc_alloc_handle(dev);
err = -ENOMEM;
if (handle == 0)
goto err_out3;
}
- if (handle == TC_H_INGRESS)
- sch->handle =TC_H_MAKE(TC_H_INGRESS, 0);
- else
- sch->handle = handle;
+ sch->handle = handle;
- /* enqueue is accessed locklessly - make sure it's visible
- * before we set a netdevice's qdisc pointer to sch */
- smp_wmb();
if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
+#ifdef CONFIG_NET_ESTIMATOR
+ if (tca[TCA_RATE-1]) {
+ err = gen_new_estimator(&sch->bstats, &sch->rate_est,
+ sch->stats_lock,
+ tca[TCA_RATE-1]);
+ if (err) {
+ /*
+ * Any broken qdiscs that would require
+ * a ops->reset() here? The qdisc was never
+ * in action so it shouldn't be necessary.
+ */
+ if (ops->destroy)
+ ops->destroy(sch);
+ goto err_out3;
+ }
+ }
+#endif
qdisc_lock_tree(dev);
list_add_tail(&sch->list, &dev->qdisc_list);
qdisc_unlock_tree(dev);
-#ifdef CONFIG_NET_ESTIMATOR
- if (tca[TCA_RATE-1])
- qdisc_new_estimator(&sch->stats, sch->stats_lock,
- tca[TCA_RATE-1]);
-#endif
return sch;
}
err_out3:
dev_put(dev);
+ kfree((char *) sch - sch->padded);
err_out2:
module_put(ops->owner);
err_out:
*errp = err;
- if (p)
- kfree(p);
return NULL;
}
return err;
}
#ifdef CONFIG_NET_ESTIMATOR
- if (tca[TCA_RATE-1]) {
- qdisc_kill_estimator(&sch->stats);
- qdisc_new_estimator(&sch->stats, sch->stats_lock,
- tca[TCA_RATE-1]);
- }
+ if (tca[TCA_RATE-1])
+ gen_replace_estimator(&sch->bstats, &sch->rate_est,
+ sch->stats_lock, tca[TCA_RATE-1]);
#endif
return 0;
}
static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
{
- struct tcmsg *tcm = NLMSG_DATA(n);
- struct rtattr **tca = arg;
+ struct tcmsg *tcm;
+ struct rtattr **tca;
struct net_device *dev;
- u32 clid = tcm->tcm_parent;
- struct Qdisc *q = NULL;
- struct Qdisc *p = NULL;
+ u32 clid;
+ struct Qdisc *q, *p;
int err;
+replay:
+ /* Reinit, just in case something touches this. */
+ tcm = NLMSG_DATA(n);
+ tca = arg;
+ clid = tcm->tcm_parent;
+ q = p = NULL;
+
if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
return -ENODEV;
q = qdisc_create(dev, tcm->tcm_parent, tca, &err);
else
q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
- if (q == NULL)
+ if (q == NULL) {
+ if (err == -EAGAIN)
+ goto replay;
return err;
+ }
graft:
if (1) {
return 0;
}
-int qdisc_copy_stats(struct sk_buff *skb, struct tc_stats *st, spinlock_t *lock)
-{
- spin_lock_bh(lock);
- RTA_PUT(skb, TCA_STATS, sizeof(struct tc_stats), st);
- spin_unlock_bh(lock);
- return 0;
-
-rtattr_failure:
- spin_unlock_bh(lock);
- return -1;
-}
-
-
static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
- u32 pid, u32 seq, unsigned flags, int event)
+ u32 pid, u32 seq, u16 flags, int event)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
unsigned char *b = skb->tail;
+ struct gnet_dump d;
- nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
- nlh->nlmsg_flags = flags;
+ nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
tcm = NLMSG_DATA(nlh);
tcm->tcm_family = AF_UNSPEC;
+ tcm->tcm__pad1 = 0;
+ tcm->tcm__pad2 = 0;
tcm->tcm_ifindex = q->dev->ifindex;
tcm->tcm_parent = clid;
tcm->tcm_handle = q->handle;
RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
if (q->ops->dump && q->ops->dump(q, skb) < 0)
goto rtattr_failure;
- q->stats.qlen = q->q.qlen;
- if (qdisc_copy_stats(skb, &q->stats, q->stats_lock))
+ q->qstats.qlen = q->q.qlen;
+
+ if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
+ TCA_XSTATS, q->stats_lock, &d) < 0)
goto rtattr_failure;
+
+ if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
+ goto rtattr_failure;
+
+ if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
+#ifdef CONFIG_NET_ESTIMATOR
+ gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
+#endif
+ gnet_stats_copy_queue(&d, &q->qstats) < 0)
+ goto rtattr_failure;
+
+ if (gnet_stats_finish_copy(&d) < 0)
+ goto rtattr_failure;
+
nlh->nlmsg_len = skb->tail - b;
return skb->len;
}
if (skb->len)
- return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+ return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
err_out:
kfree_skb(skb);
static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
unsigned long cl,
- u32 pid, u32 seq, unsigned flags, int event)
+ u32 pid, u32 seq, u16 flags, int event)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
unsigned char *b = skb->tail;
+ struct gnet_dump d;
+ struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
- nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
- nlh->nlmsg_flags = flags;
+ nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
tcm = NLMSG_DATA(nlh);
tcm->tcm_family = AF_UNSPEC;
tcm->tcm_ifindex = q->dev->ifindex;
tcm->tcm_handle = q->handle;
tcm->tcm_info = 0;
RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
- if (q->ops->cl_ops->dump && q->ops->cl_ops->dump(q, cl, skb, tcm) < 0)
+ if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
+ goto rtattr_failure;
+
+ if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
+ TCA_XSTATS, q->stats_lock, &d) < 0)
+ goto rtattr_failure;
+
+ if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
+ goto rtattr_failure;
+
+ if (gnet_stats_finish_copy(&d) < 0)
goto rtattr_failure;
+
nlh->nlmsg_len = skb->tail - b;
return skb->len;
return -EINVAL;
}
- return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+ return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
}
struct qdisc_dump_args
return skb->len;
}
-int psched_us_per_tick = 1;
-int psched_tick_per_us = 1;
+/* Main classifier routine: scans classifier chain attached
+ to this qdisc, (optionally) tests for protocol and asks
+ specific classifiers.
+ */
+int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
+ struct tcf_result *res)
+{
+ int err = 0;
+ u32 protocol = skb->protocol;
+#ifdef CONFIG_NET_CLS_ACT
+ struct tcf_proto *otp = tp;
+reclassify:
+#endif
+ protocol = skb->protocol;
+
+ for ( ; tp; tp = tp->next) {
+ if ((tp->protocol == protocol ||
+ tp->protocol == __constant_htons(ETH_P_ALL)) &&
+ (err = tp->classify(skb, tp, res)) >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+ if ( TC_ACT_RECLASSIFY == err) {
+ __u32 verd = (__u32) G_TC_VERD(skb->tc_verd);
+ tp = otp;
+
+ if (MAX_REC_LOOP < verd++) {
+ printk("rule prio %d protocol %02x reclassify is buggy packet dropped\n",
+ tp->prio&0xffff, ntohs(tp->protocol));
+ return TC_ACT_SHOT;
+ }
+ skb->tc_verd = SET_TC_VERD(skb->tc_verd,verd);
+ goto reclassify;
+ } else {
+ if (skb->tc_verd)
+ skb->tc_verd = SET_TC_VERD(skb->tc_verd,0);
+ return err;
+ }
+#else
+
+ return err;
+#endif
+ }
+
+ }
+ return -1;
+}
+
+static int psched_us_per_tick = 1;
+static int psched_tick_per_us = 1;
#ifdef CONFIG_PROC_FS
static int psched_show(struct seq_file *seq, void *v)
* with 32-bit get_cycles(). Safe up to 4GHz CPU.
*/
static void psched_tick(unsigned long);
-static struct timer_list psched_timer = TIMER_INITIALIZER(psched_tick, 0, 0);
+static DEFINE_TIMER(psched_timer, psched_tick, 0, 0);
static void psched_tick(unsigned long dummy)
{
subsys_initcall(pktsched_init);
-EXPORT_SYMBOL(qdisc_copy_stats);
+EXPORT_SYMBOL(qdisc_lookup);
EXPORT_SYMBOL(qdisc_get_rtab);
EXPORT_SYMBOL(qdisc_put_rtab);
EXPORT_SYMBOL(register_qdisc);
EXPORT_SYMBOL(unregister_qdisc);
+EXPORT_SYMBOL(tc_classify);