vserver 2.0 rc7
[linux-2.6.git] / net / core / netpoll.c
index 0860e08..a119696 100644 (file)
@@ -18,7 +18,9 @@
 #include <linux/interrupt.h>
 #include <linux/netpoll.h>
 #include <linux/sched.h>
+#include <linux/delay.h>
 #include <linux/rcupdate.h>
+#include <linux/workqueue.h>
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <asm/unaligned.h>
  * message gets out even in extreme OOM situations.
  */
 
-#define MAX_SKBS 32
 #define MAX_UDP_CHUNK 1460
+#define MAX_SKBS 32
+#define MAX_QUEUE_DEPTH (MAX_SKBS / 2)
 
 static DEFINE_SPINLOCK(skb_list_lock);
 static int nr_skbs;
 static struct sk_buff *skbs;
 
-static DEFINE_SPINLOCK(rx_list_lock);
-static LIST_HEAD(rx_list);
+static DEFINE_SPINLOCK(queue_lock);
+static int queue_depth;
+static struct sk_buff *queue_head, *queue_tail;
 
 static atomic_t trapped;
-static DEFINE_SPINLOCK(netpoll_poll_lock);
 
 #define NETPOLL_RX_ENABLED  1
 #define NETPOLL_RX_DROP     2
@@ -50,6 +53,50 @@ static DEFINE_SPINLOCK(netpoll_poll_lock);
 
 static void zap_completion_queue(void);
 
+static void queue_process(void *p)
+{
+       unsigned long flags;
+       struct sk_buff *skb;
+
+       while (queue_head) {
+               spin_lock_irqsave(&queue_lock, flags);
+
+               skb = queue_head;
+               queue_head = skb->next;
+               if (skb == queue_tail)
+                       queue_head = NULL;
+
+               queue_depth--;
+
+               spin_unlock_irqrestore(&queue_lock, flags);
+
+               dev_queue_xmit(skb);
+       }
+}
+
+static DECLARE_WORK(send_queue, queue_process, NULL);
+
+void netpoll_queue(struct sk_buff *skb)
+{
+       unsigned long flags;
+
+       if (queue_depth == MAX_QUEUE_DEPTH) {
+               __kfree_skb(skb);
+               return;
+       }
+
+       spin_lock_irqsave(&queue_lock, flags);
+       if (!queue_head)
+               queue_head = skb;
+       else
+               queue_tail->next = skb;
+       queue_tail = skb;
+       queue_depth++;
+       spin_unlock_irqrestore(&queue_lock, flags);
+
+       schedule_work(&send_queue);
+}
+
 static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
                             unsigned short ulen, u32 saddr, u32 daddr)
 {
@@ -66,8 +113,15 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
 }
 
 /*
- * Check whether delayed processing was scheduled for our current CPU,
- * and then manually invoke NAPI polling to pump data off the card.
+ * Check whether delayed processing was scheduled for our NIC. If so,
+ * we attempt to grab the poll lock and use ->poll() to pump the card.
+ * If this fails, either we've recursed in ->poll() or it's already
+ * running on another CPU.
+ *
+ * Note: we don't mask interrupts with this lock because we're using
+ * trylock here and interrupts are already disabled in the softirq
+ * case. Further, we test the poll_owner to avoid recursion on UP
+ * systems where the lock doesn't exist.
  *
  * In cases where there is bi-directional communications, reading only
  * one message at a time can lead to packets being dropped by the
@@ -77,22 +131,19 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
 static void poll_napi(struct netpoll *np)
 {
        int budget = 16;
-       unsigned long flags;
-       struct softnet_data *queue;
 
-       spin_lock_irqsave(&netpoll_poll_lock, flags);
-       queue = &__get_cpu_var(softnet_data);
        if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
-           !list_empty(&queue->poll_list)) {
-               np->dev->netpoll_rx |= NETPOLL_RX_DROP;
+           np->poll_owner != smp_processor_id() &&
+           spin_trylock(&np->poll_lock)) {
+               np->rx_flags |= NETPOLL_RX_DROP;
                atomic_inc(&trapped);
 
                np->dev->poll(np->dev, &budget);
 
                atomic_dec(&trapped);
-               np->dev->netpoll_rx &= ~NETPOLL_RX_DROP;
+               np->rx_flags &= ~NETPOLL_RX_DROP;
+               spin_unlock(&np->poll_lock);
        }
-       spin_unlock_irqrestore(&netpoll_poll_lock, flags);
 }
 
 void netpoll_poll(struct netpoll *np)
@@ -142,7 +193,10 @@ static void zap_completion_queue(void)
                while (clist != NULL) {
                        struct sk_buff *skb = clist;
                        clist = clist->next;
-                       __kfree_skb(skb);
+                       if(skb->destructor)
+                               dev_kfree_skb_any(skb); /* put this one back */
+                       else
+                               __kfree_skb(skb);
                }
        }
 
@@ -165,10 +219,11 @@ repeat:
        if (!skb) {
                spin_lock_irqsave(&skb_list_lock, flags);
                skb = skbs;
-               if (skb)
+               if (skb) {
                        skbs = skb->next;
-               skb->next = NULL;
-               nr_skbs--;
+                       skb->next = NULL;
+                       nr_skbs--;
+               }
                spin_unlock_irqrestore(&skb_list_lock, flags);
        }
 
@@ -197,6 +252,16 @@ repeat:
                return;
        }
 
+       /* avoid recursion */
+       if(np->poll_owner == smp_processor_id() ||
+          np->dev->xmit_lock_owner == smp_processor_id()) {
+               if (np->drop)
+                       np->drop(skb);
+               else
+                       __kfree_skb(skb);
+               return;
+       }
+
        spin_lock(&np->dev->xmit_lock);
        np->dev->xmit_lock_owner = smp_processor_id();
 
@@ -233,7 +298,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 
        udp_len = len + sizeof(*udph);
        ip_len = eth_len = udp_len + sizeof(*iph);
-       total_len = eth_len + ETH_HLEN;
+       total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;
 
        skb = find_skb(np, total_len, total_len - len);
        if (!skb)
@@ -269,6 +334,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
        memcpy(eth->h_source, np->local_mac, 6);
        memcpy(eth->h_dest, np->remote_mac, 6);
 
+       skb->dev = np->dev;
+
        netpoll_send_skb(np, skb);
 }
 
@@ -279,18 +346,7 @@ static void arp_reply(struct sk_buff *skb)
        int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
        u32 sip, tip;
        struct sk_buff *send_skb;
-       unsigned long flags;
-       struct list_head *p;
-       struct netpoll *np = NULL;
-
-       spin_lock_irqsave(&rx_list_lock, flags);
-       list_for_each(p, &rx_list) {
-               np = list_entry(p, struct netpoll, rx_list);
-               if ( np->dev == skb->dev )
-                       break;
-               np = NULL;
-       }
-       spin_unlock_irqrestore(&rx_list_lock, flags);
+       struct netpoll *np = skb->dev->np;
 
        if (!np) return;
 
@@ -368,15 +424,15 @@ static void arp_reply(struct sk_buff *skb)
        netpoll_send_skb(np, send_skb);
 }
 
-int netpoll_rx(struct sk_buff *skb)
+int __netpoll_rx(struct sk_buff *skb)
 {
        int proto, len, ulen;
        struct iphdr *iph;
        struct udphdr *uh;
-       struct netpoll *np;
-       struct list_head *p;
-       unsigned long flags;
+       struct netpoll *np = skb->dev->np;
 
+       if (!np->rx_hook)
+               goto out;
        if (skb->dev->type != ARPHRD_ETHER)
                goto out;
 
@@ -420,32 +476,27 @@ int netpoll_rx(struct sk_buff *skb)
                goto out;
        if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr) < 0)
                goto out;
+       if (np->local_ip && np->local_ip != ntohl(iph->daddr))
+               goto out;
+       if (np->remote_ip && np->remote_ip != ntohl(iph->saddr))
+               goto out;
+       if (np->local_port && np->local_port != ntohs(uh->dest))
+               goto out;
+
+       np->rx_hook(np, ntohs(uh->source),
+                   (char *)(uh+1),
+                   ulen - sizeof(struct udphdr));
 
-       spin_lock_irqsave(&rx_list_lock, flags);
-       list_for_each(p, &rx_list) {
-               np = list_entry(p, struct netpoll, rx_list);
-               if (np->dev && np->dev != skb->dev)
-                       continue;
-               if (np->local_ip && np->local_ip != ntohl(iph->daddr))
-                       continue;
-               if (np->remote_ip && np->remote_ip != ntohl(iph->saddr))
-                       continue;
-               if (np->local_port && np->local_port != ntohs(uh->dest))
-                       continue;
-
-               spin_unlock_irqrestore(&rx_list_lock, flags);
-
-               if (np->rx_hook)
-                       np->rx_hook(np, ntohs(uh->source),
-                                   (char *)(uh+1),
-                                   ulen - sizeof(struct udphdr));
+       kfree_skb(skb);
+       return 1;
 
+out:
+       if (atomic_read(&trapped)) {
+               kfree_skb(skb);
                return 1;
        }
-       spin_unlock_irqrestore(&rx_list_lock, flags);
 
-out:
-       return atomic_read(&trapped);
+       return 0;
 }
 
 int netpoll_parse_options(struct netpoll *np, char *opt)
@@ -561,6 +612,9 @@ int netpoll_setup(struct netpoll *np)
        struct net_device *ndev = NULL;
        struct in_device *in_dev;
 
+       np->poll_lock = SPIN_LOCK_UNLOCKED;
+       np->poll_owner = -1;
+
        if (np->dev_name)
                ndev = dev_get_by_name(np->dev_name);
        if (!ndev) {
@@ -568,6 +622,10 @@ int netpoll_setup(struct netpoll *np)
                       np->name, np->dev_name);
                return -1;
        }
+
+       np->dev = ndev;
+       ndev->np = np;
+
        if (!ndev->poll_controller) {
                printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
                       np->name, np->dev_name);
@@ -575,16 +633,13 @@ int netpoll_setup(struct netpoll *np)
        }
 
        if (!netif_running(ndev)) {
-               unsigned short oflags;
                unsigned long atmost, atleast;
 
                printk(KERN_INFO "%s: device %s not up yet, forcing it\n",
                       np->name, np->dev_name);
 
-               oflags = ndev->flags;
-
                rtnl_shlock();
-               if (dev_change_flags(ndev, oflags | IFF_UP) < 0) {
+               if (dev_change_flags(ndev, ndev->flags | IFF_UP) < 0) {
                        printk(KERN_ERR "%s: failed to open %s\n",
                               np->name, np->dev_name);
                        rtnl_shunlock();
@@ -593,7 +648,7 @@ int netpoll_setup(struct netpoll *np)
                rtnl_shunlock();
 
                atleast = jiffies + HZ/10;
-               atmost = jiffies + 10*HZ;
+               atmost = jiffies + 4*HZ;
                while (!netif_carrier_ok(ndev)) {
                        if (time_after(jiffies, atmost)) {
                                printk(KERN_NOTICE
@@ -604,12 +659,16 @@ int netpoll_setup(struct netpoll *np)
                        cond_resched();
                }
 
+               /* If carrier appears to come up instantly, we don't
+                * trust it and pause so that we don't pump all our
+                * queued console messages into the bitbucket.
+                */
+
                if (time_before(jiffies, atleast)) {
-                       printk(KERN_NOTICE "%s: carrier detect appears flaky,"
-                              " waiting 10 seconds\n",
+                       printk(KERN_NOTICE "%s: carrier detect appears"
+                              " untrustworthy, waiting 4 seconds\n",
                               np->name);
-                       while (time_before(jiffies, atmost))
-                               cond_resched();
+                       msleep(4000);
                }
        }
 
@@ -633,36 +692,22 @@ int netpoll_setup(struct netpoll *np)
                       np->name, HIPQUAD(np->local_ip));
        }
 
-       np->dev = ndev;
-
-       if(np->rx_hook) {
-               unsigned long flags;
-
-               np->dev->netpoll_rx = NETPOLL_RX_ENABLED;
-
-               spin_lock_irqsave(&rx_list_lock, flags);
-               list_add(&np->rx_list, &rx_list);
-               spin_unlock_irqrestore(&rx_list_lock, flags);
-       }
+       if(np->rx_hook)
+               np->rx_flags = NETPOLL_RX_ENABLED;
 
        return 0;
+
  release:
+       ndev->np = NULL;
+       np->dev = NULL;
        dev_put(ndev);
        return -1;
 }
 
 void netpoll_cleanup(struct netpoll *np)
 {
-       if (np->rx_hook) {
-               unsigned long flags;
-
-               spin_lock_irqsave(&rx_list_lock, flags);
-               list_del(&np->rx_list);
-               spin_unlock_irqrestore(&rx_list_lock, flags);
-       }
-
        if (np->dev)
-               np->dev->netpoll_rx = 0;
+               np->dev->np = NULL;
        dev_put(np->dev);
        np->dev = NULL;
 }
@@ -687,3 +732,4 @@ EXPORT_SYMBOL(netpoll_setup);
 EXPORT_SYMBOL(netpoll_cleanup);
 EXPORT_SYMBOL(netpoll_send_udp);
 EXPORT_SYMBOL(netpoll_poll);
+EXPORT_SYMBOL(netpoll_queue);