X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fsched%2Fsch_generic.c;h=2516dd92a0f13714cb1c5cd9ff7c797528a8301d;hb=c7b5ebbddf7bcd3651947760f423e3783bbe6573;hp=97b56255c7be36e5005419b814428a3289d522db;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 97b56255c..2516dd92a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include #include @@ -44,15 +46,28 @@ The idea is the following: - enqueue, dequeue are serialized via top level device spinlock dev->queue_lock. - - tree walking is protected by read_lock(qdisc_tree_lock) + - tree walking is protected by read_lock_bh(qdisc_tree_lock) and this lock is used only in process context. - - updates to tree are made only under rtnl semaphore, - hence this lock may be made without local bh disabling. + - updates to tree are made under rtnl semaphore or + from softirq context (__qdisc_destroy rcu-callback) + hence this lock needs local bh disabling. qdisc_tree_lock must be grabbed BEFORE dev->queue_lock! */ rwlock_t qdisc_tree_lock = RW_LOCK_UNLOCKED; +void qdisc_lock_tree(struct net_device *dev) +{ + write_lock_bh(&qdisc_tree_lock); + spin_lock_bh(&dev->queue_lock); +} + +void qdisc_unlock_tree(struct net_device *dev) +{ + spin_unlock_bh(&dev->queue_lock); + write_unlock_bh(&qdisc_tree_lock); +} + /* dev->queue_lock serializes queue accesses for this device AND dev->qdisc pointer itself. @@ -82,46 +97,71 @@ int qdisc_restart(struct net_device *dev) /* Dequeue packet */ if ((skb = q->dequeue(q)) != NULL) { - if (spin_trylock(&dev->xmit_lock)) { + unsigned nolock = (dev->features & NETIF_F_LLTX); + /* + * When the driver has LLTX set it does its own locking + * in start_xmit. No need to add additional overhead by + * locking again. These checks are worth it because + * even uncongested locks can be quite expensive. + * The driver can do trylock like here too, in case + * of lock congestion it should return -1 and the packet + * will be requeued. + */ + if (!nolock) { + if (!spin_trylock(&dev->xmit_lock)) { + collision: + /* So, someone grabbed the driver. */ + + /* It may be transient configuration error, + when hard_start_xmit() recurses. We detect + it by checking xmit owner and drop the + packet when deadloop is detected. + */ + if (dev->xmit_lock_owner == smp_processor_id()) { + kfree_skb(skb); + if (net_ratelimit()) + printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name); + return -1; + } + __get_cpu_var(netdev_rx_stat).cpu_collision++; + goto requeue; + } /* Remember that the driver is grabbed by us. */ dev->xmit_lock_owner = smp_processor_id(); - + } + + { /* And release queue */ spin_unlock(&dev->queue_lock); if (!netif_queue_stopped(dev)) { + int ret; if (netdev_nit) dev_queue_xmit_nit(skb, dev); - if (dev->hard_start_xmit(skb, dev) == 0) { - dev->xmit_lock_owner = -1; - spin_unlock(&dev->xmit_lock); - + ret = dev->hard_start_xmit(skb, dev); + if (ret == NETDEV_TX_OK) { + if (!nolock) { + dev->xmit_lock_owner = -1; + spin_unlock(&dev->xmit_lock); + } spin_lock(&dev->queue_lock); return -1; } + if (ret == NETDEV_TX_LOCKED && nolock) { + spin_lock(&dev->queue_lock); + goto collision; + } } + /* NETDEV_TX_BUSY - we need to requeue */ /* Release the driver */ - dev->xmit_lock_owner = -1; - spin_unlock(&dev->xmit_lock); + if (!nolock) { + dev->xmit_lock_owner = -1; + spin_unlock(&dev->xmit_lock); + } spin_lock(&dev->queue_lock); q = dev->qdisc; - } else { - /* So, someone grabbed the driver. */ - - /* It may be transient configuration error, - when hard_start_xmit() recurses. We detect - it by checking xmit owner and drop the - packet when deadloop is detected. - */ - if (dev->xmit_lock_owner == smp_processor_id()) { - kfree_skb(skb); - if (net_ratelimit()) - printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name); - return -1; - } - __get_cpu_var(netdev_rx_stat).cpu_collision++; } /* Device kicked us out :( @@ -134,6 +174,7 @@ int qdisc_restart(struct net_device *dev) 3. device is buggy (ppp) */ +requeue: q->ops->requeue(skb, q); netif_schedule(dev); return 1; @@ -270,10 +311,9 @@ static const u8 prio2band[TC_PRIO_MAX+1] = static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) { - struct sk_buff_head *list; + struct sk_buff_head *list = qdisc_priv(qdisc); - list = ((struct sk_buff_head*)qdisc->data) + - prio2band[skb->priority&TC_PRIO_MAX]; + list += prio2band[skb->priority&TC_PRIO_MAX]; if (list->qlen < qdisc->dev->tx_queue_len) { __skb_queue_tail(list, skb); @@ -291,7 +331,7 @@ static struct sk_buff * pfifo_fast_dequeue(struct Qdisc* qdisc) { int prio; - struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data); + struct sk_buff_head *list = qdisc_priv(qdisc); struct sk_buff *skb; for (prio = 0; prio < 3; prio++, list++) { @@ -307,10 +347,9 @@ pfifo_fast_dequeue(struct Qdisc* qdisc) static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) { - struct sk_buff_head *list; + struct sk_buff_head *list = qdisc_priv(qdisc); - list = ((struct sk_buff_head*)qdisc->data) + - prio2band[skb->priority&TC_PRIO_MAX]; + list += prio2band[skb->priority&TC_PRIO_MAX]; __skb_queue_head(list, skb); qdisc->q.qlen++; @@ -321,7 +360,7 @@ static void pfifo_fast_reset(struct Qdisc* qdisc) { int prio; - struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data); + struct sk_buff_head *list = qdisc_priv(qdisc); for (prio=0; prio < 3; prio++) skb_queue_purge(list+prio); @@ -346,9 +385,7 @@ rtattr_failure: static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt) { int i; - struct sk_buff_head *list; - - list = ((struct sk_buff_head*)qdisc->data); + struct sk_buff_head *list = qdisc_priv(qdisc); for (i=0; i<3; i++) skb_queue_head_init(list+i); @@ -372,25 +409,40 @@ static struct Qdisc_ops pfifo_fast_ops = { struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops) { + void *p; struct Qdisc *sch; - int size = sizeof(*sch) + ops->priv_size; + int size; + + /* ensure that the Qdisc and the private data are 32-byte aligned */ + size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST); + size += ops->priv_size + QDISC_ALIGN_CONST; - sch = kmalloc(size, GFP_KERNEL); - if (!sch) + p = kmalloc(size, GFP_KERNEL); + if (!p) return NULL; - memset(sch, 0, size); + memset(p, 0, size); + sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST) + & ~QDISC_ALIGN_CONST); + sch->padded = (char *)sch - (char *)p; + + INIT_LIST_HEAD(&sch->list); skb_queue_head_init(&sch->q); sch->ops = ops; sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev = dev; - sch->stats.lock = &dev->queue_lock; + dev_hold(dev); + sch->stats_lock = &dev->queue_lock; atomic_set(&sch->refcnt, 1); + /* enqueue is accessed locklessly - make sure it's visible + * before we set a netdevice's qdisc pointer to sch */ + smp_wmb(); if (!ops->init || ops->init(sch, NULL) == 0) return sch; - kfree(sch); + dev_put(dev); + kfree(p); return NULL; } @@ -404,39 +456,39 @@ void qdisc_reset(struct Qdisc *qdisc) ops->reset(qdisc); } -/* Under dev->queue_lock and BH! */ +/* this is the rcu callback function to clean up a qdisc when there + * are no further references to it */ -void qdisc_destroy(struct Qdisc *qdisc) +static void __qdisc_destroy(struct rcu_head *head) { - struct Qdisc_ops *ops = qdisc->ops; - struct net_device *dev; + struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); + struct Qdisc_ops *ops = qdisc->ops; - if (!atomic_dec_and_test(&qdisc->refcnt)) - return; - - dev = qdisc->dev; - - if (dev) { - struct Qdisc *q, **qp; - for (qp = &qdisc->dev->qdisc_list; (q=*qp) != NULL; qp = &q->next) { - if (q == qdisc) { - *qp = q->next; - break; - } - } - } #ifdef CONFIG_NET_ESTIMATOR qdisc_kill_estimator(&qdisc->stats); #endif + write_lock(&qdisc_tree_lock); if (ops->reset) ops->reset(qdisc); if (ops->destroy) ops->destroy(qdisc); + write_unlock(&qdisc_tree_lock); module_put(ops->owner); + + dev_put(qdisc->dev); if (!(qdisc->flags&TCQ_F_BUILTIN)) - kfree(qdisc); + kfree((char *) qdisc - qdisc->padded); } +/* Under dev->queue_lock and BH! */ + +void qdisc_destroy(struct Qdisc *qdisc) +{ + if (!atomic_dec_and_test(&qdisc->refcnt)) + return; + list_del(&qdisc->list); + call_rcu(&qdisc->q_rcu, __qdisc_destroy); +} void dev_activate(struct net_device *dev) { @@ -454,18 +506,15 @@ void dev_activate(struct net_device *dev) printk(KERN_INFO "%s: activation failed\n", dev->name); return; } - - write_lock(&qdisc_tree_lock); - qdisc->next = dev->qdisc_list; - dev->qdisc_list = qdisc; - write_unlock(&qdisc_tree_lock); - + write_lock_bh(&qdisc_tree_lock); + list_add_tail(&qdisc->list, &dev->qdisc_list); + write_unlock_bh(&qdisc_tree_lock); } else { qdisc = &noqueue_qdisc; } - write_lock(&qdisc_tree_lock); + write_lock_bh(&qdisc_tree_lock); dev->qdisc_sleeping = qdisc; - write_unlock(&qdisc_tree_lock); + write_unlock_bh(&qdisc_tree_lock); } spin_lock_bh(&dev->queue_lock); @@ -498,13 +547,11 @@ void dev_deactivate(struct net_device *dev) void dev_init_scheduler(struct net_device *dev) { - write_lock(&qdisc_tree_lock); - spin_lock_bh(&dev->queue_lock); + qdisc_lock_tree(dev); dev->qdisc = &noop_qdisc; - spin_unlock_bh(&dev->queue_lock); dev->qdisc_sleeping = &noop_qdisc; - dev->qdisc_list = NULL; - write_unlock(&qdisc_tree_lock); + INIT_LIST_HEAD(&dev->qdisc_list); + qdisc_unlock_tree(dev); dev_watchdog_init(dev); } @@ -513,8 +560,7 @@ void dev_shutdown(struct net_device *dev) { struct Qdisc *qdisc; - write_lock(&qdisc_tree_lock); - spin_lock_bh(&dev->queue_lock); + qdisc_lock_tree(dev); qdisc = dev->qdisc_sleeping; dev->qdisc = &noop_qdisc; dev->qdisc_sleeping = &noop_qdisc; @@ -525,11 +571,8 @@ void dev_shutdown(struct net_device *dev) qdisc_destroy(qdisc); } #endif - BUG_TRAP(dev->qdisc_list == NULL); BUG_TRAP(!timer_pending(&dev->watchdog_timer)); - dev->qdisc_list = NULL; - spin_unlock_bh(&dev->queue_lock); - write_unlock(&qdisc_tree_lock); + qdisc_unlock_tree(dev); } EXPORT_SYMBOL(__netdev_watchdog_up); @@ -539,4 +582,5 @@ EXPORT_SYMBOL(qdisc_create_dflt); EXPORT_SYMBOL(qdisc_destroy); EXPORT_SYMBOL(qdisc_reset); EXPORT_SYMBOL(qdisc_restart); -EXPORT_SYMBOL(qdisc_tree_lock); +EXPORT_SYMBOL(qdisc_lock_tree); +EXPORT_SYMBOL(qdisc_unlock_tree);