#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
The idea is the following:
- enqueue, dequeue are serialized via top level device
spinlock dev->queue_lock.
- - tree walking is protected by read_lock(qdisc_tree_lock)
+ - tree walking is protected by read_lock_bh(qdisc_tree_lock)
and this lock is used only in process context.
- - updates to tree are made only under rtnl semaphore,
- hence this lock may be made without local bh disabling.
+ - updates to tree are made under rtnl semaphore or
+ from softirq context (__qdisc_destroy rcu-callback)
+ hence this lock needs local bh disabling.
qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
*/
rwlock_t qdisc_tree_lock = RW_LOCK_UNLOCKED;
+void qdisc_lock_tree(struct net_device *dev)
+{
+ write_lock_bh(&qdisc_tree_lock);
+ spin_lock_bh(&dev->queue_lock);
+}
+
+void qdisc_unlock_tree(struct net_device *dev)
+{
+ spin_unlock_bh(&dev->queue_lock);
+ write_unlock_bh(&qdisc_tree_lock);
+}
+
/*
dev->queue_lock serializes queue accesses for this device
AND dev->qdisc pointer itself.
/* Dequeue packet */
if ((skb = q->dequeue(q)) != NULL) {
- if (spin_trylock(&dev->xmit_lock)) {
+ unsigned nolock = (dev->features & NETIF_F_LLTX);
+ /*
+ * When the driver has LLTX set it does its own locking
+ * in start_xmit. No need to add additional overhead by
+ * locking again. These checks are worth it because
+ * even uncongested locks can be quite expensive.
+ * The driver can do trylock like here too, in case
+ * of lock congestion it should return -1 and the packet
+ * will be requeued.
+ */
+ if (!nolock) {
+ if (!spin_trylock(&dev->xmit_lock)) {
+ collision:
+ /* So, someone grabbed the driver. */
+
+ /* It may be transient configuration error,
+ when hard_start_xmit() recurses. We detect
+ it by checking xmit owner and drop the
+ packet when deadloop is detected.
+ */
+ if (dev->xmit_lock_owner == smp_processor_id()) {
+ kfree_skb(skb);
+ if (net_ratelimit())
+ printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
+ return -1;
+ }
+ __get_cpu_var(netdev_rx_stat).cpu_collision++;
+ goto requeue;
+ }
/* Remember that the driver is grabbed by us. */
dev->xmit_lock_owner = smp_processor_id();
-
+ }
+
+ {
/* And release queue */
spin_unlock(&dev->queue_lock);
if (!netif_queue_stopped(dev)) {
+ int ret;
if (netdev_nit)
dev_queue_xmit_nit(skb, dev);
- if (dev->hard_start_xmit(skb, dev) == 0) {
- dev->xmit_lock_owner = -1;
- spin_unlock(&dev->xmit_lock);
-
+ ret = dev->hard_start_xmit(skb, dev);
+ if (ret == NETDEV_TX_OK) {
+ if (!nolock) {
+ dev->xmit_lock_owner = -1;
+ spin_unlock(&dev->xmit_lock);
+ }
spin_lock(&dev->queue_lock);
return -1;
}
+ if (ret == NETDEV_TX_LOCKED && nolock) {
+ spin_lock(&dev->queue_lock);
+ goto collision;
+ }
}
+ /* NETDEV_TX_BUSY - we need to requeue */
/* Release the driver */
- dev->xmit_lock_owner = -1;
- spin_unlock(&dev->xmit_lock);
+ if (!nolock) {
+ dev->xmit_lock_owner = -1;
+ spin_unlock(&dev->xmit_lock);
+ }
spin_lock(&dev->queue_lock);
q = dev->qdisc;
- } else {
- /* So, someone grabbed the driver. */
-
- /* It may be transient configuration error,
- when hard_start_xmit() recurses. We detect
- it by checking xmit owner and drop the
- packet when deadloop is detected.
- */
- if (dev->xmit_lock_owner == smp_processor_id()) {
- kfree_skb(skb);
- if (net_ratelimit())
- printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
- return -1;
- }
- __get_cpu_var(netdev_rx_stat).cpu_collision++;
}
/* Device kicked us out :(
3. device is buggy (ppp)
*/
+requeue:
q->ops->requeue(skb, q);
netif_schedule(dev);
return 1;
static int
pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
{
- struct sk_buff_head *list;
+ struct sk_buff_head *list = qdisc_priv(qdisc);
- list = ((struct sk_buff_head*)qdisc->data) +
- prio2band[skb->priority&TC_PRIO_MAX];
+ list += prio2band[skb->priority&TC_PRIO_MAX];
if (list->qlen < qdisc->dev->tx_queue_len) {
__skb_queue_tail(list, skb);
pfifo_fast_dequeue(struct Qdisc* qdisc)
{
int prio;
- struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data);
+ struct sk_buff_head *list = qdisc_priv(qdisc);
struct sk_buff *skb;
for (prio = 0; prio < 3; prio++, list++) {
static int
pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
{
- struct sk_buff_head *list;
+ struct sk_buff_head *list = qdisc_priv(qdisc);
- list = ((struct sk_buff_head*)qdisc->data) +
- prio2band[skb->priority&TC_PRIO_MAX];
+ list += prio2band[skb->priority&TC_PRIO_MAX];
__skb_queue_head(list, skb);
qdisc->q.qlen++;
pfifo_fast_reset(struct Qdisc* qdisc)
{
int prio;
- struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data);
+ struct sk_buff_head *list = qdisc_priv(qdisc);
for (prio=0; prio < 3; prio++)
skb_queue_purge(list+prio);
static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
{
int i;
- struct sk_buff_head *list;
-
- list = ((struct sk_buff_head*)qdisc->data);
+ struct sk_buff_head *list = qdisc_priv(qdisc);
for (i=0; i<3; i++)
skb_queue_head_init(list+i);
struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
{
+ void *p;
struct Qdisc *sch;
- int size = sizeof(*sch) + ops->priv_size;
+ int size;
+
+ /* ensure that the Qdisc and the private data are 32-byte aligned */
+ size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST);
+ size += ops->priv_size + QDISC_ALIGN_CONST;
- sch = kmalloc(size, GFP_KERNEL);
- if (!sch)
+ p = kmalloc(size, GFP_KERNEL);
+ if (!p)
return NULL;
- memset(sch, 0, size);
+ memset(p, 0, size);
+ sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST)
+ & ~QDISC_ALIGN_CONST);
+ sch->padded = (char *)sch - (char *)p;
+
+ INIT_LIST_HEAD(&sch->list);
skb_queue_head_init(&sch->q);
sch->ops = ops;
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
sch->dev = dev;
- sch->stats.lock = &dev->queue_lock;
+ dev_hold(dev);
+ sch->stats_lock = &dev->queue_lock;
atomic_set(&sch->refcnt, 1);
+ /* enqueue is accessed locklessly - make sure it's visible
+ * before we set a netdevice's qdisc pointer to sch */
+ smp_wmb();
if (!ops->init || ops->init(sch, NULL) == 0)
return sch;
- kfree(sch);
+ dev_put(dev);
+ kfree(p);
return NULL;
}
ops->reset(qdisc);
}
-/* Under dev->queue_lock and BH! */
+/* this is the rcu callback function to clean up a qdisc when there
+ * are no further references to it */
-void qdisc_destroy(struct Qdisc *qdisc)
+static void __qdisc_destroy(struct rcu_head *head)
{
- struct Qdisc_ops *ops = qdisc->ops;
- struct net_device *dev;
+ struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
+ struct Qdisc_ops *ops = qdisc->ops;
- if (!atomic_dec_and_test(&qdisc->refcnt))
- return;
-
- dev = qdisc->dev;
-
- if (dev) {
- struct Qdisc *q, **qp;
- for (qp = &qdisc->dev->qdisc_list; (q=*qp) != NULL; qp = &q->next) {
- if (q == qdisc) {
- *qp = q->next;
- break;
- }
- }
- }
#ifdef CONFIG_NET_ESTIMATOR
qdisc_kill_estimator(&qdisc->stats);
#endif
+ write_lock(&qdisc_tree_lock);
if (ops->reset)
ops->reset(qdisc);
if (ops->destroy)
ops->destroy(qdisc);
+ write_unlock(&qdisc_tree_lock);
module_put(ops->owner);
+
+ dev_put(qdisc->dev);
if (!(qdisc->flags&TCQ_F_BUILTIN))
- kfree(qdisc);
+ kfree((char *) qdisc - qdisc->padded);
}
+/* Under dev->queue_lock and BH! */
+
+void qdisc_destroy(struct Qdisc *qdisc)
+{
+ if (!atomic_dec_and_test(&qdisc->refcnt))
+ return;
+ list_del(&qdisc->list);
+ call_rcu(&qdisc->q_rcu, __qdisc_destroy);
+}
void dev_activate(struct net_device *dev)
{
printk(KERN_INFO "%s: activation failed\n", dev->name);
return;
}
-
- write_lock(&qdisc_tree_lock);
- qdisc->next = dev->qdisc_list;
- dev->qdisc_list = qdisc;
- write_unlock(&qdisc_tree_lock);
-
+ write_lock_bh(&qdisc_tree_lock);
+ list_add_tail(&qdisc->list, &dev->qdisc_list);
+ write_unlock_bh(&qdisc_tree_lock);
} else {
qdisc = &noqueue_qdisc;
}
- write_lock(&qdisc_tree_lock);
+ write_lock_bh(&qdisc_tree_lock);
dev->qdisc_sleeping = qdisc;
- write_unlock(&qdisc_tree_lock);
+ write_unlock_bh(&qdisc_tree_lock);
}
spin_lock_bh(&dev->queue_lock);
void dev_init_scheduler(struct net_device *dev)
{
- write_lock(&qdisc_tree_lock);
- spin_lock_bh(&dev->queue_lock);
+ qdisc_lock_tree(dev);
dev->qdisc = &noop_qdisc;
- spin_unlock_bh(&dev->queue_lock);
dev->qdisc_sleeping = &noop_qdisc;
- dev->qdisc_list = NULL;
- write_unlock(&qdisc_tree_lock);
+ INIT_LIST_HEAD(&dev->qdisc_list);
+ qdisc_unlock_tree(dev);
dev_watchdog_init(dev);
}
{
struct Qdisc *qdisc;
- write_lock(&qdisc_tree_lock);
- spin_lock_bh(&dev->queue_lock);
+ qdisc_lock_tree(dev);
qdisc = dev->qdisc_sleeping;
dev->qdisc = &noop_qdisc;
dev->qdisc_sleeping = &noop_qdisc;
qdisc_destroy(qdisc);
}
#endif
- BUG_TRAP(dev->qdisc_list == NULL);
BUG_TRAP(!timer_pending(&dev->watchdog_timer));
- dev->qdisc_list = NULL;
- spin_unlock_bh(&dev->queue_lock);
- write_unlock(&qdisc_tree_lock);
+ qdisc_unlock_tree(dev);
}
EXPORT_SYMBOL(__netdev_watchdog_up);
EXPORT_SYMBOL(qdisc_destroy);
EXPORT_SYMBOL(qdisc_reset);
EXPORT_SYMBOL(qdisc_restart);
-EXPORT_SYMBOL(qdisc_tree_lock);
+EXPORT_SYMBOL(qdisc_lock_tree);
+EXPORT_SYMBOL(qdisc_unlock_tree);