X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;ds=sidebyside;f=net%2Fsched%2Fsch_generic.c;h=82117f9ba7d1907dba3dcb800f95dc5236ad49c6;hb=9bf4aaab3e101692164d49b7ca357651eb691cb6;hp=97b56255c7be36e5005419b814428a3289d522db;hpb=db216c3d5e4c040e557a50f8f5d35d5c415e8c1c;p=linux-2.6.git diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 97b56255c..82117f9ba 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include #include @@ -44,15 +46,28 @@ The idea is the following: - enqueue, dequeue are serialized via top level device spinlock dev->queue_lock. - - tree walking is protected by read_lock(qdisc_tree_lock) + - tree walking is protected by read_lock_bh(qdisc_tree_lock) and this lock is used only in process context. - - updates to tree are made only under rtnl semaphore, - hence this lock may be made without local bh disabling. + - updates to tree are made under rtnl semaphore or + from softirq context (__qdisc_destroy rcu-callback) + hence this lock needs local bh disabling. qdisc_tree_lock must be grabbed BEFORE dev->queue_lock! */ rwlock_t qdisc_tree_lock = RW_LOCK_UNLOCKED; +void qdisc_lock_tree(struct net_device *dev) +{ + write_lock_bh(&qdisc_tree_lock); + spin_lock_bh(&dev->queue_lock); +} + +void qdisc_unlock_tree(struct net_device *dev) +{ + spin_unlock_bh(&dev->queue_lock); + write_unlock_bh(&qdisc_tree_lock); +} + /* dev->queue_lock serializes queue accesses for this device AND dev->qdisc pointer itself. @@ -270,10 +285,9 @@ static const u8 prio2band[TC_PRIO_MAX+1] = static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) { - struct sk_buff_head *list; + struct sk_buff_head *list = qdisc_priv(qdisc); - list = ((struct sk_buff_head*)qdisc->data) + - prio2band[skb->priority&TC_PRIO_MAX]; + list += prio2band[skb->priority&TC_PRIO_MAX]; if (list->qlen < qdisc->dev->tx_queue_len) { __skb_queue_tail(list, skb); @@ -291,7 +305,7 @@ static struct sk_buff * pfifo_fast_dequeue(struct Qdisc* qdisc) { int prio; - struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data); + struct sk_buff_head *list = qdisc_priv(qdisc); struct sk_buff *skb; for (prio = 0; prio < 3; prio++, list++) { @@ -307,10 +321,9 @@ pfifo_fast_dequeue(struct Qdisc* qdisc) static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) { - struct sk_buff_head *list; + struct sk_buff_head *list = qdisc_priv(qdisc); - list = ((struct sk_buff_head*)qdisc->data) + - prio2band[skb->priority&TC_PRIO_MAX]; + list += prio2band[skb->priority&TC_PRIO_MAX]; __skb_queue_head(list, skb); qdisc->q.qlen++; @@ -321,7 +334,7 @@ static void pfifo_fast_reset(struct Qdisc* qdisc) { int prio; - struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data); + struct sk_buff_head *list = qdisc_priv(qdisc); for (prio=0; prio < 3; prio++) skb_queue_purge(list+prio); @@ -346,9 +359,7 @@ rtattr_failure: static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt) { int i; - struct sk_buff_head *list; - - list = ((struct sk_buff_head*)qdisc->data); + struct sk_buff_head *list = qdisc_priv(qdisc); for (i=0; i<3; i++) skb_queue_head_init(list+i); @@ -372,25 +383,39 @@ static struct Qdisc_ops pfifo_fast_ops = { struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops) { + void *p; struct Qdisc *sch; - int size = sizeof(*sch) + ops->priv_size; + int size; - sch = kmalloc(size, GFP_KERNEL); - if (!sch) + /* ensure that the Qdisc and the private data are 32-byte aligned */ + size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST); + size += ops->priv_size + QDISC_ALIGN_CONST; + + p = kmalloc(size, GFP_KERNEL); + if (!p) return NULL; - memset(sch, 0, size); + memset(p, 0, size); + + sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST) + & ~QDISC_ALIGN_CONST); + sch->padded = (char *)sch - (char *)p; + INIT_LIST_HEAD(&sch->list); skb_queue_head_init(&sch->q); sch->ops = ops; sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev = dev; - sch->stats.lock = &dev->queue_lock; + dev_hold(dev); + sch->stats_lock = &dev->queue_lock; atomic_set(&sch->refcnt, 1); + /* enqueue is accessed locklessly - make sure it's visible + * before we set a netdevice's qdisc pointer to sch */ + smp_wmb(); if (!ops->init || ops->init(sch, NULL) == 0) return sch; - kfree(sch); + kfree(p); return NULL; } @@ -404,39 +429,39 @@ void qdisc_reset(struct Qdisc *qdisc) ops->reset(qdisc); } -/* Under dev->queue_lock and BH! */ +/* this is the rcu callback function to clean up a qdisc when there + * are no further references to it */ -void qdisc_destroy(struct Qdisc *qdisc) +static void __qdisc_destroy(struct rcu_head *head) { - struct Qdisc_ops *ops = qdisc->ops; - struct net_device *dev; - - if (!atomic_dec_and_test(&qdisc->refcnt)) - return; - - dev = qdisc->dev; + struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); + struct Qdisc_ops *ops = qdisc->ops; - if (dev) { - struct Qdisc *q, **qp; - for (qp = &qdisc->dev->qdisc_list; (q=*qp) != NULL; qp = &q->next) { - if (q == qdisc) { - *qp = q->next; - break; - } - } - } #ifdef CONFIG_NET_ESTIMATOR qdisc_kill_estimator(&qdisc->stats); #endif + write_lock(&qdisc_tree_lock); if (ops->reset) ops->reset(qdisc); if (ops->destroy) ops->destroy(qdisc); + write_unlock(&qdisc_tree_lock); module_put(ops->owner); + + dev_put(qdisc->dev); if (!(qdisc->flags&TCQ_F_BUILTIN)) - kfree(qdisc); + kfree((char *) qdisc - qdisc->padded); } +/* Under dev->queue_lock and BH! */ + +void qdisc_destroy(struct Qdisc *qdisc) +{ + if (!atomic_dec_and_test(&qdisc->refcnt)) + return; + list_del(&qdisc->list); + call_rcu(&qdisc->q_rcu, __qdisc_destroy); +} void dev_activate(struct net_device *dev) { @@ -454,18 +479,15 @@ void dev_activate(struct net_device *dev) printk(KERN_INFO "%s: activation failed\n", dev->name); return; } - - write_lock(&qdisc_tree_lock); - qdisc->next = dev->qdisc_list; - dev->qdisc_list = qdisc; - write_unlock(&qdisc_tree_lock); - + write_lock_bh(&qdisc_tree_lock); + list_add_tail(&qdisc->list, &dev->qdisc_list); + write_unlock_bh(&qdisc_tree_lock); } else { qdisc = &noqueue_qdisc; } - write_lock(&qdisc_tree_lock); + write_lock_bh(&qdisc_tree_lock); dev->qdisc_sleeping = qdisc; - write_unlock(&qdisc_tree_lock); + write_unlock_bh(&qdisc_tree_lock); } spin_lock_bh(&dev->queue_lock); @@ -498,13 +520,11 @@ void dev_deactivate(struct net_device *dev) void dev_init_scheduler(struct net_device *dev) { - write_lock(&qdisc_tree_lock); - spin_lock_bh(&dev->queue_lock); + qdisc_lock_tree(dev); dev->qdisc = &noop_qdisc; - spin_unlock_bh(&dev->queue_lock); dev->qdisc_sleeping = &noop_qdisc; - dev->qdisc_list = NULL; - write_unlock(&qdisc_tree_lock); + INIT_LIST_HEAD(&dev->qdisc_list); + qdisc_unlock_tree(dev); dev_watchdog_init(dev); } @@ -513,8 +533,7 @@ void dev_shutdown(struct net_device *dev) { struct Qdisc *qdisc; - write_lock(&qdisc_tree_lock); - spin_lock_bh(&dev->queue_lock); + qdisc_lock_tree(dev); qdisc = dev->qdisc_sleeping; dev->qdisc = &noop_qdisc; dev->qdisc_sleeping = &noop_qdisc; @@ -525,11 +544,8 @@ void dev_shutdown(struct net_device *dev) qdisc_destroy(qdisc); } #endif - BUG_TRAP(dev->qdisc_list == NULL); BUG_TRAP(!timer_pending(&dev->watchdog_timer)); - dev->qdisc_list = NULL; - spin_unlock_bh(&dev->queue_lock); - write_unlock(&qdisc_tree_lock); + qdisc_unlock_tree(dev); } EXPORT_SYMBOL(__netdev_watchdog_up); @@ -539,4 +555,5 @@ EXPORT_SYMBOL(qdisc_create_dflt); EXPORT_SYMBOL(qdisc_destroy); EXPORT_SYMBOL(qdisc_reset); EXPORT_SYMBOL(qdisc_restart); -EXPORT_SYMBOL(qdisc_tree_lock); +EXPORT_SYMBOL(qdisc_lock_tree); +EXPORT_SYMBOL(qdisc_unlock_tree);