fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / net / core / netpoll.c
index 4115945..522e441 100644 (file)
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/string.h>
+#include <linux/if_arp.h>
 #include <linux/inetdevice.h>
 #include <linux/inet.h>
 #include <linux/interrupt.h>
 #include <linux/netpoll.h>
 #include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/rcupdate.h>
+#include <linux/workqueue.h>
 #include <net/tcp.h>
 #include <net/udp.h>
+#include <asm/unaligned.h>
 
 /*
  * We maintain a small pool of fully-sized skbs, to make sure the
  * message gets out even in extreme OOM situations.
  */
 
-#define MAX_SKBS 32
 #define MAX_UDP_CHUNK 1460
+#define MAX_SKBS 32
+#define MAX_QUEUE_DEPTH (MAX_SKBS / 2)
 
-static spinlock_t skb_list_lock = SPIN_LOCK_UNLOCKED;
-static int nr_skbs;
-static struct sk_buff *skbs;
+static struct sk_buff_head skb_pool;
 
-static spinlock_t rx_list_lock = SPIN_LOCK_UNLOCKED;
-static LIST_HEAD(rx_list);
+static atomic_t trapped;
 
-static int trapped;
+#define USEC_PER_POLL  50
+#define NETPOLL_RX_ENABLED  1
+#define NETPOLL_RX_DROP     2
 
 #define MAX_SKB_SIZE \
                (MAX_UDP_CHUNK + sizeof(struct udphdr) + \
                                sizeof(struct iphdr) + sizeof(struct ethhdr))
 
 static void zap_completion_queue(void);
+static void arp_reply(struct sk_buff *skb);
+
+static void queue_process(struct work_struct *work)
+{
+       struct netpoll_info *npinfo =
+               container_of(work, struct netpoll_info, tx_work.work);
+       struct sk_buff *skb;
+       unsigned long flags;
+
+       while ((skb = skb_dequeue(&npinfo->txq))) {
+               struct net_device *dev = skb->dev;
+
+               if (!netif_device_present(dev) || !netif_running(dev)) {
+                       __kfree_skb(skb);
+                       continue;
+               }
+
+               local_irq_save(flags);
+               netif_tx_lock(dev);
+               if (netif_queue_stopped(dev) ||
+                   dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
+                       skb_queue_head(&npinfo->txq, skb);
+                       netif_tx_unlock(dev);
+                       local_irq_restore(flags);
+
+                       schedule_delayed_work(&npinfo->tx_work, HZ/10);
+                       return;
+               }
+               netif_tx_unlock(dev);
+               local_irq_restore(flags);
+       }
+}
 
-static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
-                            unsigned short ulen, u32 saddr, u32 daddr)
+static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
+                           unsigned short ulen, __be32 saddr, __be32 daddr)
 {
-       if (uh->check == 0)
+       __wsum psum;
+
+       if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY)
                return 0;
 
-       if (skb->ip_summed == CHECKSUM_HW)
-               return csum_tcpudp_magic(
-                       saddr, daddr, ulen, IPPROTO_UDP, skb->csum);
+       psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
+
+       if (skb->ip_summed == CHECKSUM_COMPLETE &&
+           !csum_fold(csum_add(psum, skb->csum)))
+               return 0;
 
-       skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
+       skb->csum = psum;
 
-       return csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+       return __skb_checksum_complete(skb);
 }
 
-void netpoll_poll(struct netpoll *np)
+/*
+ * Check whether delayed processing was scheduled for our NIC. If so,
+ * we attempt to grab the poll lock and use ->poll() to pump the card.
+ * If this fails, either we've recursed in ->poll() or it's already
+ * running on another CPU.
+ *
+ * Note: we don't mask interrupts with this lock because we're using
+ * trylock here and interrupts are already disabled in the softirq
+ * case. Further, we test the poll_owner to avoid recursion on UP
+ * systems where the lock doesn't exist.
+ *
+ * In cases where there is bi-directional communications, reading only
+ * one message at a time can lead to packets being dropped by the
+ * network adapter, forcing superfluous retries and possibly timeouts.
+ * Thus, we set our budget to greater than 1.
+ */
+static void poll_napi(struct netpoll *np)
 {
-       int budget = 1;
+       struct netpoll_info *npinfo = np->dev->npinfo;
+       int budget = 16;
 
-       if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
+       if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
+           npinfo->poll_owner != smp_processor_id() &&
+           spin_trylock(&npinfo->poll_lock)) {
+               npinfo->rx_flags |= NETPOLL_RX_DROP;
+               atomic_inc(&trapped);
+
+               np->dev->poll(np->dev, &budget);
+
+               atomic_dec(&trapped);
+               npinfo->rx_flags &= ~NETPOLL_RX_DROP;
+               spin_unlock(&npinfo->poll_lock);
+       }
+}
+
+static void service_arp_queue(struct netpoll_info *npi)
+{
+       struct sk_buff *skb;
+
+       if (unlikely(!npi))
+               return;
+
+       skb = skb_dequeue(&npi->arp_tx);
+
+       while (skb != NULL) {
+               arp_reply(skb);
+               skb = skb_dequeue(&npi->arp_tx);
+       }
+}
+
+void netpoll_poll(struct netpoll *np)
+{
+       if (!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
                return;
 
        /* Process pending work on NIC */
        np->dev->poll_controller(np->dev);
+       if (np->dev->poll)
+               poll_napi(np);
+
+       service_arp_queue(np->dev->npinfo);
 
-       /* If scheduling is stopped, tickle NAPI bits */
-       if(trapped && np->dev->poll &&
-          test_bit(__LINK_STATE_RX_SCHED, &np->dev->state))
-               np->dev->poll(np->dev, &budget);
        zap_completion_queue();
 }
 
@@ -81,17 +170,15 @@ static void refill_skbs(void)
        struct sk_buff *skb;
        unsigned long flags;
 
-       spin_lock_irqsave(&skb_list_lock, flags);
-       while (nr_skbs < MAX_SKBS) {
+       spin_lock_irqsave(&skb_pool.lock, flags);
+       while (skb_pool.qlen < MAX_SKBS) {
                skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
                if (!skb)
                        break;
 
-               skb->next = skbs;
-               skbs = skb;
-               nr_skbs++;
+               __skb_queue_tail(&skb_pool, skb);
        }
-       spin_unlock_irqrestore(&skb_list_lock, flags);
+       spin_unlock_irqrestore(&skb_pool.lock, flags);
 }
 
 static void zap_completion_queue(void)
@@ -110,44 +197,35 @@ static void zap_completion_queue(void)
                while (clist != NULL) {
                        struct sk_buff *skb = clist;
                        clist = clist->next;
-                       __kfree_skb(skb);
+                       if (skb->destructor)
+                               dev_kfree_skb_any(skb); /* put this one back */
+                       else
+                               __kfree_skb(skb);
                }
        }
 
        put_cpu_var(softnet_data);
 }
 
-static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve)
+static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
 {
-       int once = 1, count = 0;
-       unsigned long flags;
-       struct sk_buff *skb = NULL;
+       int count = 0;
+       struct sk_buff *skb;
 
        zap_completion_queue();
+       refill_skbs();
 repeat:
-       if (nr_skbs < MAX_SKBS)
-               refill_skbs();
 
        skb = alloc_skb(len, GFP_ATOMIC);
+       if (!skb)
+               skb = skb_dequeue(&skb_pool);
 
        if (!skb) {
-               spin_lock_irqsave(&skb_list_lock, flags);
-               skb = skbs;
-               if (skb)
-                       skbs = skb->next;
-               skb->next = NULL;
-               nr_skbs--;
-               spin_unlock_irqrestore(&skb_list_lock, flags);
-       }
-
-       if(!skb) {
-               count++;
-               if (once && (count == 1000000)) {
-                       printk("out of netpoll skbs!\n");
-                       once = 0;
+               if (++count < 10) {
+                       netpoll_poll(np);
+                       goto repeat;
                }
-               netpoll_poll(np);
-               goto repeat;
+               return NULL;
        }
 
        atomic_set(&skb->users, 1);
@@ -155,27 +233,47 @@ repeat:
        return skb;
 }
 
-void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
-       int status;
+       int status = NETDEV_TX_BUSY;
+       unsigned long tries;
+       struct net_device *dev = np->dev;
+       struct netpoll_info *npinfo = np->dev->npinfo;
+
+       if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
+               __kfree_skb(skb);
+               return;
+       }
+
+       /* don't get messages out of order, and no recursion */
+       if (skb_queue_len(&npinfo->txq) == 0 &&
+                   npinfo->poll_owner != smp_processor_id()) {
+               unsigned long flags;
 
-repeat:
-       if(!np || !np->dev || !netif_running(np->dev)) {
-               __kfree_skb(skb);
-               return;
-       }
+               local_irq_save(flags);
+               if (netif_tx_trylock(dev)) {
+                       /* try until next clock tick */
+                       for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
+                                       tries > 0; --tries) {
+                               if (!netif_queue_stopped(dev))
+                                       status = dev->hard_start_xmit(skb, dev);
 
-       spin_lock(&np->dev->xmit_lock);
-       np->dev->xmit_lock_owner = smp_processor_id();
+                               if (status == NETDEV_TX_OK)
+                                       break;
 
-       status = np->dev->hard_start_xmit(skb, np->dev);
-       np->dev->xmit_lock_owner = -1;
-       spin_unlock(&np->dev->xmit_lock);
+                               /* tickle device maybe there is some cleanup */
+                               netpoll_poll(np);
 
-       /* transmit busy */
-       if(status) {
-               netpoll_poll(np);
-               goto repeat;
+                               udelay(USEC_PER_POLL);
+                       }
+                       netif_tx_unlock(dev);
+               }
+               local_irq_restore(flags);
+       }
+
+       if (status != NETDEV_TX_OK) {
+               skb_queue_tail(&npinfo->txq, skb);
+               schedule_delayed_work(&npinfo->tx_work,0);
        }
 }
 
@@ -189,7 +287,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 
        udp_len = len + sizeof(*udph);
        ip_len = eth_len = udp_len + sizeof(*iph);
-       total_len = eth_len + ETH_HLEN;
+       total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;
 
        skb = find_skb(np, total_len, total_len - len);
        if (!skb)
@@ -198,57 +296,59 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
        memcpy(skb->data, msg, len);
        skb->len += len;
 
-       udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
+       skb->h.uh = udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
        udph->source = htons(np->local_port);
        udph->dest = htons(np->remote_port);
        udph->len = htons(udp_len);
        udph->check = 0;
+       udph->check = csum_tcpudp_magic(htonl(np->local_ip),
+                                       htonl(np->remote_ip),
+                                       udp_len, IPPROTO_UDP,
+                                       csum_partial((unsigned char *)udph, udp_len, 0));
+       if (udph->check == 0)
+               udph->check = CSUM_MANGLED_0;
 
-       iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
+       skb->nh.iph = iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
 
-       iph->version  = 4;
-       iph->ihl      = 5;
+       /* iph->version = 4; iph->ihl = 5; */
+       put_unaligned(0x45, (unsigned char *)iph);
        iph->tos      = 0;
-       iph->tot_len  = htons(ip_len);
+       put_unaligned(htons(ip_len), &(iph->tot_len));
        iph->id       = 0;
        iph->frag_off = 0;
        iph->ttl      = 64;
        iph->protocol = IPPROTO_UDP;
        iph->check    = 0;
-       iph->saddr    = htonl(np->local_ip);
-       iph->daddr    = htonl(np->remote_ip);
+       put_unaligned(htonl(np->local_ip), &(iph->saddr));
+       put_unaligned(htonl(np->remote_ip), &(iph->daddr));
        iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
 
        eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
-
-       eth->h_proto = htons(ETH_P_IP);
+       skb->mac.raw = skb->data;
+       skb->protocol = eth->h_proto = htons(ETH_P_IP);
        memcpy(eth->h_source, np->local_mac, 6);
        memcpy(eth->h_dest, np->remote_mac, 6);
 
+       skb->dev = np->dev;
+
        netpoll_send_skb(np, skb);
 }
 
 static void arp_reply(struct sk_buff *skb)
 {
+       struct netpoll_info *npinfo = skb->dev->npinfo;
        struct arphdr *arp;
        unsigned char *arp_ptr;
        int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
-       u32 sip, tip;
+       __be32 sip, tip;
+       unsigned char *sha;
        struct sk_buff *send_skb;
-       unsigned long flags;
-       struct list_head *p;
-       struct netpoll *np = 0;
-
-       spin_lock_irqsave(&rx_list_lock, flags);
-       list_for_each(p, &rx_list) {
-               np = list_entry(p, struct netpoll, rx_list);
-               if ( np->dev == skb->dev )
-                       break;
-               np = 0;
-       }
-       spin_unlock_irqrestore(&rx_list_lock, flags);
+       struct netpoll *np = NULL;
 
-       if (!np) return;
+       if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev)
+               np = npinfo->rx_np;
+       if (!np)
+               return;
 
        /* No arp on this interface */
        if (skb->dev->flags & IFF_NOARP)
@@ -268,9 +368,14 @@ static void arp_reply(struct sk_buff *skb)
            arp->ar_op != htons(ARPOP_REQUEST))
                return;
 
-       arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len;
+       arp_ptr = (unsigned char *)(arp+1);
+       /* save the location of the src hw addr */
+       sha = arp_ptr;
+       arp_ptr += skb->dev->addr_len;
        memcpy(&sip, arp_ptr, 4);
-       arp_ptr += 4 + skb->dev->addr_len;
+       arp_ptr += 4;
+       /* if we actually cared about dst hw addr, it would get copied here */
+       arp_ptr += skb->dev->addr_len;
        memcpy(&tip, arp_ptr, 4);
 
        /* Should we ignore arp? */
@@ -293,8 +398,8 @@ static void arp_reply(struct sk_buff *skb)
 
        if (np->dev->hard_header &&
            np->dev->hard_header(send_skb, skb->dev, ptype,
-                                      np->remote_mac, np->local_mac,
-                                      send_skb->len) < 0) {
+                                sha, np->local_mac,
+                                send_skb->len) < 0) {
                kfree_skb(send_skb);
                return;
        }
@@ -317,32 +422,34 @@ static void arp_reply(struct sk_buff *skb)
        arp_ptr += np->dev->addr_len;
        memcpy(arp_ptr, &tip, 4);
        arp_ptr += 4;
-       memcpy(arp_ptr, np->remote_mac, np->dev->addr_len);
+       memcpy(arp_ptr, sha, np->dev->addr_len);
        arp_ptr += np->dev->addr_len;
        memcpy(arp_ptr, &sip, 4);
 
        netpoll_send_skb(np, send_skb);
 }
 
-int netpoll_rx(struct sk_buff *skb)
+int __netpoll_rx(struct sk_buff *skb)
 {
        int proto, len, ulen;
        struct iphdr *iph;
        struct udphdr *uh;
-       struct netpoll *np;
-       struct list_head *p;
-       unsigned long flags;
+       struct netpoll_info *npi = skb->dev->npinfo;
+       struct netpoll *np = npi->rx_np;
 
+       if (!np)
+               goto out;
        if (skb->dev->type != ARPHRD_ETHER)
                goto out;
 
        /* check if netpoll clients need ARP */
-       if (skb->protocol == __constant_htons(ETH_P_ARP) && trapped) {
-               arp_reply(skb);
+       if (skb->protocol == __constant_htons(ETH_P_ARP) &&
+           atomic_read(&trapped)) {
+               skb_queue_tail(&npi->arp_tx, skb);
                return 1;
        }
 
-       proto = ntohs(skb->mac.ethernet->h_proto);
+       proto = ntohs(eth_hdr(skb)->h_proto);
        if (proto != ETH_P_IP)
                goto out;
        if (skb->pkt_type == PACKET_OTHERHOST)
@@ -364,6 +471,13 @@ int netpoll_rx(struct sk_buff *skb)
        if (skb->len < len || len < iph->ihl*4)
                goto out;
 
+       /*
+        * Our transport medium may have padded the buffer out.
+        * Now We trim to the true length of the frame.
+        */
+       if (pskb_trim_rcsum(skb, len))
+               goto out;
+
        if (iph->protocol != IPPROTO_UDP)
                goto out;
 
@@ -373,81 +487,76 @@ int netpoll_rx(struct sk_buff *skb)
 
        if (ulen != len)
                goto out;
-       if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr) < 0)
+       if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
+               goto out;
+       if (np->local_ip && np->local_ip != ntohl(iph->daddr))
+               goto out;
+       if (np->remote_ip && np->remote_ip != ntohl(iph->saddr))
+               goto out;
+       if (np->local_port && np->local_port != ntohs(uh->dest))
                goto out;
 
-       spin_lock_irqsave(&rx_list_lock, flags);
-       list_for_each(p, &rx_list) {
-               np = list_entry(p, struct netpoll, rx_list);
-               if (np->dev && np->dev != skb->dev)
-                       continue;
-               if (np->local_ip && np->local_ip != ntohl(iph->daddr))
-                       continue;
-               if (np->remote_ip && np->remote_ip != ntohl(iph->saddr))
-                       continue;
-               if (np->local_port && np->local_port != ntohs(uh->dest))
-                       continue;
-
-               spin_unlock_irqrestore(&rx_list_lock, flags);
+       np->rx_hook(np, ntohs(uh->source),
+                   (char *)(uh+1),
+                   ulen - sizeof(struct udphdr));
 
-               if (np->rx_hook)
-                       np->rx_hook(np, ntohs(uh->source),
-                                   (char *)(uh+1),
-                                   ulen - sizeof(struct udphdr));
+       kfree_skb(skb);
+       return 1;
 
+out:
+       if (atomic_read(&trapped)) {
+               kfree_skb(skb);
                return 1;
        }
-       spin_unlock_irqrestore(&rx_list_lock, flags);
 
-out:
-       return trapped;
+       return 0;
 }
 
 int netpoll_parse_options(struct netpoll *np, char *opt)
 {
        char *cur=opt, *delim;
 
-       if(*cur != '@') {
+       if (*cur != '@') {
                if ((delim = strchr(cur, '@')) == NULL)
                        goto parse_failed;
-               *delim=0;
-               np->local_port=simple_strtol(cur, 0, 10);
-               cur=delim;
+               *delim = 0;
+               np->local_port = simple_strtol(cur, NULL, 10);
+               cur = delim;
        }
        cur++;
        printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port);
 
-       if(*cur != '/') {
+       if (*cur != '/') {
                if ((delim = strchr(cur, '/')) == NULL)
                        goto parse_failed;
-               *delim=0;
-               np->local_ip=ntohl(in_aton(cur));
-               cur=delim;
+               *delim = 0;
+               np->local_ip = ntohl(in_aton(cur));
+               cur = delim;
 
                printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
                       np->name, HIPQUAD(np->local_ip));
        }
        cur++;
 
-       if ( *cur != ',') {
+       if (*cur != ',') {
                /* parse out dev name */
                if ((delim = strchr(cur, ',')) == NULL)
                        goto parse_failed;
-               *delim=0;
+               *delim = 0;
                strlcpy(np->dev_name, cur, sizeof(np->dev_name));
-               cur=delim;
+               cur = delim;
        }
        cur++;
 
        printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name);
 
-       if ( *cur != '@' ) {
+       if (*cur != '@') {
                /* dst port */
                if ((delim = strchr(cur, '@')) == NULL)
                        goto parse_failed;
-               *delim=0;
-               np->remote_port=simple_strtol(cur, 0, 10);
-               cur=delim;
+               *delim = 0;
+               np->remote_port = simple_strtol(cur, NULL, 10);
+               cur = delim;
        }
        cur++;
        printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port);
@@ -455,42 +564,41 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
        /* dst ip */
        if ((delim = strchr(cur, '/')) == NULL)
                goto parse_failed;
-       *delim=0;
-       np->remote_ip=ntohl(in_aton(cur));
-       cur=delim+1;
+       *delim = 0;
+       np->remote_ip = ntohl(in_aton(cur));
+       cur = delim + 1;
 
        printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
-                      np->name, HIPQUAD(np->remote_ip));
+              np->name, HIPQUAD(np->remote_ip));
 
-       if( *cur != 0 )
-       {
+       if (*cur != 0) {
                /* MAC address */
                if ((delim = strchr(cur, ':')) == NULL)
                        goto parse_failed;
-               *delim=0;
-               np->remote_mac[0]=simple_strtol(cur, 0, 16);
-               cur=delim+1;
+               *delim = 0;
+               np->remote_mac[0] = simple_strtol(cur, NULL, 16);
+               cur = delim + 1;
                if ((delim = strchr(cur, ':')) == NULL)
                        goto parse_failed;
-               *delim=0;
-               np->remote_mac[1]=simple_strtol(cur, 0, 16);
-               cur=delim+1;
+               *delim = 0;
+               np->remote_mac[1] = simple_strtol(cur, NULL, 16);
+               cur = delim + 1;
                if ((delim = strchr(cur, ':')) == NULL)
                        goto parse_failed;
-               *delim=0;
-               np->remote_mac[2]=simple_strtol(cur, 0, 16);
-               cur=delim+1;
+               *delim = 0;
+               np->remote_mac[2] = simple_strtol(cur, NULL, 16);
+               cur = delim + 1;
                if ((delim = strchr(cur, ':')) == NULL)
                        goto parse_failed;
-               *delim=0;
-               np->remote_mac[3]=simple_strtol(cur, 0, 16);
-               cur=delim+1;
+               *delim = 0;
+               np->remote_mac[3] = simple_strtol(cur, NULL, 16);
+               cur = delim + 1;
                if ((delim = strchr(cur, ':')) == NULL)
                        goto parse_failed;
-               *delim=0;
-               np->remote_mac[4]=simple_strtol(cur, 0, 16);
-               cur=delim+1;
-               np->remote_mac[5]=simple_strtol(cur, 0, 16);
+               *delim = 0;
+               np->remote_mac[4] = simple_strtol(cur, NULL, 16);
+               cur = delim + 1;
+               np->remote_mac[5] = simple_strtol(cur, NULL, 16);
        }
 
        printk(KERN_INFO "%s: remote ethernet address "
@@ -515,40 +623,67 @@ int netpoll_setup(struct netpoll *np)
 {
        struct net_device *ndev = NULL;
        struct in_device *in_dev;
+       struct netpoll_info *npinfo;
+       unsigned long flags;
+       int err;
 
        if (np->dev_name)
                ndev = dev_get_by_name(np->dev_name);
        if (!ndev) {
                printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
                       np->name, np->dev_name);
-               return -1;
+               return -ENODEV;
        }
+
+       np->dev = ndev;
+       if (!ndev->npinfo) {
+               npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
+               if (!npinfo) {
+                       err = -ENOMEM;
+                       goto release;
+               }
+
+               npinfo->rx_flags = 0;
+               npinfo->rx_np = NULL;
+               spin_lock_init(&npinfo->poll_lock);
+               npinfo->poll_owner = -1;
+
+               spin_lock_init(&npinfo->rx_lock);
+               skb_queue_head_init(&npinfo->arp_tx);
+               skb_queue_head_init(&npinfo->txq);
+               INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
+
+               atomic_set(&npinfo->refcnt, 1);
+       } else {
+               npinfo = ndev->npinfo;
+               atomic_inc(&npinfo->refcnt);
+       }
+
        if (!ndev->poll_controller) {
                printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
                       np->name, np->dev_name);
+               err = -ENOTSUPP;
                goto release;
        }
 
-       if (!(ndev->flags & IFF_UP)) {
-               unsigned short oflags;
+       if (!netif_running(ndev)) {
                unsigned long atmost, atleast;
 
                printk(KERN_INFO "%s: device %s not up yet, forcing it\n",
                       np->name, np->dev_name);
 
-               oflags = ndev->flags;
+               rtnl_lock();
+               err = dev_open(ndev);
+               rtnl_unlock();
 
-               rtnl_shlock();
-               if (dev_change_flags(ndev, oflags | IFF_UP) < 0) {
+               if (err) {
                        printk(KERN_ERR "%s: failed to open %s\n",
-                              np->name, np->dev_name);
-                       rtnl_shunlock();
+                              np->name, ndev->name);
                        goto release;
                }
-               rtnl_shunlock();
 
                atleast = jiffies + HZ/10;
-               atmost = jiffies + 10*HZ;
+               atmost = jiffies + 4*HZ;
                while (!netif_carrier_ok(ndev)) {
                        if (time_after(jiffies, atmost)) {
                                printk(KERN_NOTICE
@@ -559,78 +694,116 @@ int netpoll_setup(struct netpoll *np)
                        cond_resched();
                }
 
+               /* If carrier appears to come up instantly, we don't
+                * trust it and pause so that we don't pump all our
+                * queued console messages into the bitbucket.
+                */
+
                if (time_before(jiffies, atleast)) {
-                       printk(KERN_NOTICE "%s: carrier detect appears flaky,"
-                              " waiting 10 seconds\n",
+                       printk(KERN_NOTICE "%s: carrier detect appears"
+                              " untrustworthy, waiting 4 seconds\n",
                               np->name);
-                       while (time_before(jiffies, atmost))
-                               cond_resched();
+                       msleep(4000);
                }
        }
 
-       if (!memcmp(np->local_mac, "\0\0\0\0\0\0", 6) && ndev->dev_addr)
+       if (is_zero_ether_addr(np->local_mac) && ndev->dev_addr)
                memcpy(np->local_mac, ndev->dev_addr, 6);
 
        if (!np->local_ip) {
-               in_dev = in_dev_get(ndev);
+               rcu_read_lock();
+               in_dev = __in_dev_get_rcu(ndev);
 
-               if (!in_dev) {
+               if (!in_dev || !in_dev->ifa_list) {
+                       rcu_read_unlock();
                        printk(KERN_ERR "%s: no IP address for %s, aborting\n",
                               np->name, np->dev_name);
+                       err = -EDESTADDRREQ;
                        goto release;
                }
 
                np->local_ip = ntohl(in_dev->ifa_list->ifa_local);
-               in_dev_put(in_dev);
+               rcu_read_unlock();
                printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
                       np->name, HIPQUAD(np->local_ip));
        }
 
-       np->dev = ndev;
+       if (np->rx_hook) {
+               spin_lock_irqsave(&npinfo->rx_lock, flags);
+               npinfo->rx_flags |= NETPOLL_RX_ENABLED;
+               npinfo->rx_np = np;
+               spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+       }
 
-       if(np->rx_hook) {
-               unsigned long flags;
+       /* fill up the skb queue */
+       refill_skbs();
 
-#ifdef CONFIG_NETPOLL_RX
-               np->dev->netpoll_rx = 1;
-#endif
+       /* last thing to do is link it to the net device structure */
+       ndev->npinfo = npinfo;
 
-               spin_lock_irqsave(&rx_list_lock, flags);
-               list_add(&np->rx_list, &rx_list);
-               spin_unlock_irqrestore(&rx_list_lock, flags);
-       }
+       /* avoid racing with NAPI reading npinfo */
+       synchronize_rcu();
 
        return 0;
+
  release:
+       if (!ndev->npinfo)
+               kfree(npinfo);
+       np->dev = NULL;
        dev_put(ndev);
-       return -1;
+       return err;
 }
 
+static int __init netpoll_init(void)
+{
+       skb_queue_head_init(&skb_pool);
+       return 0;
+}
+core_initcall(netpoll_init);
+
 void netpoll_cleanup(struct netpoll *np)
 {
-       if(np->rx_hook) {
-               unsigned long flags;
+       struct netpoll_info *npinfo;
+       unsigned long flags;
+
+       if (np->dev) {
+               npinfo = np->dev->npinfo;
+               if (npinfo) {
+                       if (npinfo->rx_np == np) {
+                               spin_lock_irqsave(&npinfo->rx_lock, flags);
+                               npinfo->rx_np = NULL;
+                               npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
+                               spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+                       }
+
+                       np->dev->npinfo = NULL;
+                       if (atomic_dec_and_test(&npinfo->refcnt)) {
+                               skb_queue_purge(&npinfo->arp_tx);
+                               skb_queue_purge(&npinfo->txq);
+                               cancel_rearming_delayed_work(&npinfo->tx_work);
+                               flush_scheduled_work();
+
+                               kfree(npinfo);
+                       }
+               }
 
-               spin_lock_irqsave(&rx_list_lock, flags);
-               list_del(&np->rx_list);
-#ifdef CONFIG_NETPOLL_RX
-               np->dev->netpoll_rx = 0;
-#endif
-               spin_unlock_irqrestore(&rx_list_lock, flags);
+               dev_put(np->dev);
        }
 
-       dev_put(np->dev);
-       np->dev = 0;
+       np->dev = NULL;
 }
 
 int netpoll_trap(void)
 {
-       return trapped;
+       return atomic_read(&trapped);
 }
 
 void netpoll_set_trap(int trap)
 {
-       trapped = trap;
+       if (trap)
+               atomic_inc(&trapped);
+       else
+               atomic_dec(&trapped);
 }
 
 EXPORT_SYMBOL(netpoll_set_trap);
@@ -638,6 +811,5 @@ EXPORT_SYMBOL(netpoll_trap);
 EXPORT_SYMBOL(netpoll_parse_options);
 EXPORT_SYMBOL(netpoll_setup);
 EXPORT_SYMBOL(netpoll_cleanup);
-EXPORT_SYMBOL(netpoll_send_skb);
 EXPORT_SYMBOL(netpoll_send_udp);
 EXPORT_SYMBOL(netpoll_poll);