X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fcore%2Fnetpoll.c;h=522e44173e56fb496994714bbd65e53959368e87;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=ea51f8d02eb8654d0533afd7bc5eb38a0e08c7fd;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/net/core/netpoll.c b/net/core/netpoll.c index ea51f8d02..522e44173 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -34,18 +34,12 @@ #define MAX_UDP_CHUNK 1460 #define MAX_SKBS 32 #define MAX_QUEUE_DEPTH (MAX_SKBS / 2) -#define MAX_RETRIES 20000 -static DEFINE_SPINLOCK(skb_list_lock); -static int nr_skbs; -static struct sk_buff *skbs; - -static DEFINE_SPINLOCK(queue_lock); -static int queue_depth; -static struct sk_buff *queue_head, *queue_tail; +static struct sk_buff_head skb_pool; static atomic_t trapped; +#define USEC_PER_POLL 50 #define NETPOLL_RX_ENABLED 1 #define NETPOLL_RX_DROP 2 @@ -54,63 +48,51 @@ static atomic_t trapped; sizeof(struct iphdr) + sizeof(struct ethhdr)) static void zap_completion_queue(void); +static void arp_reply(struct sk_buff *skb); -static void queue_process(void *p) +static void queue_process(struct work_struct *work) { - unsigned long flags; + struct netpoll_info *npinfo = + container_of(work, struct netpoll_info, tx_work.work); struct sk_buff *skb; - - while (queue_head) { - spin_lock_irqsave(&queue_lock, flags); - - skb = queue_head; - queue_head = skb->next; - if (skb == queue_tail) - queue_head = NULL; - - queue_depth--; - - spin_unlock_irqrestore(&queue_lock, flags); - - dev_queue_xmit(skb); - } -} - -static DECLARE_WORK(send_queue, queue_process, NULL); - -void netpoll_queue(struct sk_buff *skb) -{ unsigned long flags; - if (queue_depth == MAX_QUEUE_DEPTH) { - __kfree_skb(skb); - return; - } + while ((skb = skb_dequeue(&npinfo->txq))) { + struct net_device *dev = skb->dev; - spin_lock_irqsave(&queue_lock, flags); - if (!queue_head) - queue_head = skb; - else - queue_tail->next = skb; - queue_tail = skb; - queue_depth++; - spin_unlock_irqrestore(&queue_lock, flags); + if (!netif_device_present(dev) || !netif_running(dev)) { + __kfree_skb(skb); + continue; + } - schedule_work(&send_queue); + local_irq_save(flags); + netif_tx_lock(dev); + if (netif_queue_stopped(dev) || + dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + skb_queue_head(&npinfo->txq, skb); + netif_tx_unlock(dev); + local_irq_restore(flags); + + schedule_delayed_work(&npinfo->tx_work, HZ/10); + return; + } + netif_tx_unlock(dev); + local_irq_restore(flags); + } } -static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, - unsigned short ulen, u32 saddr, u32 daddr) +static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, + unsigned short ulen, __be32 saddr, __be32 daddr) { - unsigned int psum; + __wsum psum; if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY) return 0; psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); - if (skb->ip_summed == CHECKSUM_HW && - !(u16)csum_fold(csum_add(psum, skb->csum))) + if (skb->ip_summed == CHECKSUM_COMPLETE && + !csum_fold(csum_add(psum, skb->csum))) return 0; skb->csum = psum; @@ -153,9 +135,24 @@ static void poll_napi(struct netpoll *np) } } +static void service_arp_queue(struct netpoll_info *npi) +{ + struct sk_buff *skb; + + if (unlikely(!npi)) + return; + + skb = skb_dequeue(&npi->arp_tx); + + while (skb != NULL) { + arp_reply(skb); + skb = skb_dequeue(&npi->arp_tx); + } +} + void netpoll_poll(struct netpoll *np) { - if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) + if (!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) return; /* Process pending work on NIC */ @@ -163,6 +160,8 @@ void netpoll_poll(struct netpoll *np) if (np->dev->poll) poll_napi(np); + service_arp_queue(np->dev->npinfo); + zap_completion_queue(); } @@ -171,17 +170,15 @@ static void refill_skbs(void) struct sk_buff *skb; unsigned long flags; - spin_lock_irqsave(&skb_list_lock, flags); - while (nr_skbs < MAX_SKBS) { + spin_lock_irqsave(&skb_pool.lock, flags); + while (skb_pool.qlen < MAX_SKBS) { skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); if (!skb) break; - skb->next = skbs; - skbs = skb; - nr_skbs++; + __skb_queue_tail(&skb_pool, skb); } - spin_unlock_irqrestore(&skb_list_lock, flags); + spin_unlock_irqrestore(&skb_pool.lock, flags); } static void zap_completion_queue(void) @@ -200,7 +197,7 @@ static void zap_completion_queue(void) while (clist != NULL) { struct sk_buff *skb = clist; clist = clist->next; - if(skb->destructor) + if (skb->destructor) dev_kfree_skb_any(skb); /* put this one back */ else __kfree_skb(skb); @@ -210,38 +207,25 @@ static void zap_completion_queue(void) put_cpu_var(softnet_data); } -static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve) +static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) { - int once = 1, count = 0; - unsigned long flags; - struct sk_buff *skb = NULL; + int count = 0; + struct sk_buff *skb; zap_completion_queue(); + refill_skbs(); repeat: - if (nr_skbs < MAX_SKBS) - refill_skbs(); skb = alloc_skb(len, GFP_ATOMIC); + if (!skb) + skb = skb_dequeue(&skb_pool); if (!skb) { - spin_lock_irqsave(&skb_list_lock, flags); - skb = skbs; - if (skb) { - skbs = skb->next; - skb->next = NULL; - nr_skbs--; - } - spin_unlock_irqrestore(&skb_list_lock, flags); - } - - if(!skb) { - count++; - if (once && (count == 1000000)) { - printk("out of netpoll skbs!\n"); - once = 0; + if (++count < 10) { + netpoll_poll(np); + goto repeat; } - netpoll_poll(np); - goto repeat; + return NULL; } atomic_set(&skb->users, 1); @@ -251,57 +235,46 @@ repeat: static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { - int status; - struct netpoll_info *npinfo; + int status = NETDEV_TX_BUSY; + unsigned long tries; + struct net_device *dev = np->dev; + struct netpoll_info *npinfo = np->dev->npinfo; - if (!np || !np->dev || !netif_running(np->dev)) { - __kfree_skb(skb); - return; - } + if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { + __kfree_skb(skb); + return; + } - npinfo = np->dev->npinfo; + /* don't get messages out of order, and no recursion */ + if (skb_queue_len(&npinfo->txq) == 0 && + npinfo->poll_owner != smp_processor_id()) { + unsigned long flags; - /* avoid recursion */ - if (npinfo->poll_owner == smp_processor_id() || - np->dev->xmit_lock_owner == smp_processor_id()) { - if (np->drop) - np->drop(skb); - else - __kfree_skb(skb); - return; - } + local_irq_save(flags); + if (netif_tx_trylock(dev)) { + /* try until next clock tick */ + for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; + tries > 0; --tries) { + if (!netif_queue_stopped(dev)) + status = dev->hard_start_xmit(skb, dev); - do { - npinfo->tries--; - spin_lock(&np->dev->xmit_lock); - np->dev->xmit_lock_owner = smp_processor_id(); + if (status == NETDEV_TX_OK) + break; - /* - * network drivers do not expect to be called if the queue is - * stopped. - */ - if (netif_queue_stopped(np->dev)) { - np->dev->xmit_lock_owner = -1; - spin_unlock(&np->dev->xmit_lock); - netpoll_poll(np); - udelay(50); - continue; - } - - status = np->dev->hard_start_xmit(skb, np->dev); - np->dev->xmit_lock_owner = -1; - spin_unlock(&np->dev->xmit_lock); + /* tickle device maybe there is some cleanup */ + netpoll_poll(np); - /* success */ - if(!status) { - npinfo->tries = MAX_RETRIES; /* reset */ - return; + udelay(USEC_PER_POLL); + } + netif_tx_unlock(dev); } + local_irq_restore(flags); + } - /* transmit busy */ - netpoll_poll(np); - udelay(50); - } while (npinfo->tries > 0); + if (status != NETDEV_TX_OK) { + skb_queue_tail(&npinfo->txq, skb); + schedule_delayed_work(&npinfo->tx_work,0); + } } void netpoll_send_udp(struct netpoll *np, const char *msg, int len) @@ -323,13 +296,19 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) memcpy(skb->data, msg, len); skb->len += len; - udph = (struct udphdr *) skb_push(skb, sizeof(*udph)); + skb->h.uh = udph = (struct udphdr *) skb_push(skb, sizeof(*udph)); udph->source = htons(np->local_port); udph->dest = htons(np->remote_port); udph->len = htons(udp_len); udph->check = 0; + udph->check = csum_tcpudp_magic(htonl(np->local_ip), + htonl(np->remote_ip), + udp_len, IPPROTO_UDP, + csum_partial((unsigned char *)udph, udp_len, 0)); + if (udph->check == 0) + udph->check = CSUM_MANGLED_0; - iph = (struct iphdr *)skb_push(skb, sizeof(*iph)); + skb->nh.iph = iph = (struct iphdr *)skb_push(skb, sizeof(*iph)); /* iph->version = 4; iph->ihl = 5; */ put_unaligned(0x45, (unsigned char *)iph); @@ -345,8 +324,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); - - eth->h_proto = htons(ETH_P_IP); + skb->mac.raw = skb->data; + skb->protocol = eth->h_proto = htons(ETH_P_IP); memcpy(eth->h_source, np->local_mac, 6); memcpy(eth->h_dest, np->remote_mac, 6); @@ -361,7 +340,8 @@ static void arp_reply(struct sk_buff *skb) struct arphdr *arp; unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; - u32 sip, tip; + __be32 sip, tip; + unsigned char *sha; struct sk_buff *send_skb; struct netpoll *np = NULL; @@ -388,9 +368,14 @@ static void arp_reply(struct sk_buff *skb) arp->ar_op != htons(ARPOP_REQUEST)) return; - arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len; + arp_ptr = (unsigned char *)(arp+1); + /* save the location of the src hw addr */ + sha = arp_ptr; + arp_ptr += skb->dev->addr_len; memcpy(&sip, arp_ptr, 4); - arp_ptr += 4 + skb->dev->addr_len; + arp_ptr += 4; + /* if we actually cared about dst hw addr, it would get copied here */ + arp_ptr += skb->dev->addr_len; memcpy(&tip, arp_ptr, 4); /* Should we ignore arp? */ @@ -413,8 +398,8 @@ static void arp_reply(struct sk_buff *skb) if (np->dev->hard_header && np->dev->hard_header(send_skb, skb->dev, ptype, - np->remote_mac, np->local_mac, - send_skb->len) < 0) { + sha, np->local_mac, + send_skb->len) < 0) { kfree_skb(send_skb); return; } @@ -437,7 +422,7 @@ static void arp_reply(struct sk_buff *skb) arp_ptr += np->dev->addr_len; memcpy(arp_ptr, &tip, 4); arp_ptr += 4; - memcpy(arp_ptr, np->remote_mac, np->dev->addr_len); + memcpy(arp_ptr, sha, np->dev->addr_len); arp_ptr += np->dev->addr_len; memcpy(arp_ptr, &sip, 4); @@ -449,7 +434,8 @@ int __netpoll_rx(struct sk_buff *skb) int proto, len, ulen; struct iphdr *iph; struct udphdr *uh; - struct netpoll *np = skb->dev->npinfo->rx_np; + struct netpoll_info *npi = skb->dev->npinfo; + struct netpoll *np = npi->rx_np; if (!np) goto out; @@ -459,7 +445,7 @@ int __netpoll_rx(struct sk_buff *skb) /* check if netpoll clients need ARP */ if (skb->protocol == __constant_htons(ETH_P_ARP) && atomic_read(&trapped)) { - arp_reply(skb); + skb_queue_tail(&npi->arp_tx, skb); return 1; } @@ -485,6 +471,13 @@ int __netpoll_rx(struct sk_buff *skb) if (skb->len < len || len < iph->ihl*4) goto out; + /* + * Our transport medium may have padded the buffer out. + * Now We trim to the true length of the frame. + */ + if (pskb_trim_rcsum(skb, len)) + goto out; + if (iph->protocol != IPPROTO_UDP) goto out; @@ -523,47 +516,47 @@ int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; - if(*cur != '@') { + if (*cur != '@') { if ((delim = strchr(cur, '@')) == NULL) goto parse_failed; - *delim=0; - np->local_port=simple_strtol(cur, NULL, 10); - cur=delim; + *delim = 0; + np->local_port = simple_strtol(cur, NULL, 10); + cur = delim; } cur++; printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port); - if(*cur != '/') { + if (*cur != '/') { if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; - *delim=0; - np->local_ip=ntohl(in_aton(cur)); - cur=delim; + *delim = 0; + np->local_ip = ntohl(in_aton(cur)); + cur = delim; printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", np->name, HIPQUAD(np->local_ip)); } cur++; - if ( *cur != ',') { + if (*cur != ',') { /* parse out dev name */ if ((delim = strchr(cur, ',')) == NULL) goto parse_failed; - *delim=0; + *delim = 0; strlcpy(np->dev_name, cur, sizeof(np->dev_name)); - cur=delim; + cur = delim; } cur++; printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name); - if ( *cur != '@' ) { + if (*cur != '@') { /* dst port */ if ((delim = strchr(cur, '@')) == NULL) goto parse_failed; - *delim=0; - np->remote_port=simple_strtol(cur, NULL, 10); - cur=delim; + *delim = 0; + np->remote_port = simple_strtol(cur, NULL, 10); + cur = delim; } cur++; printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port); @@ -571,42 +564,41 @@ int netpoll_parse_options(struct netpoll *np, char *opt) /* dst ip */ if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; - *delim=0; - np->remote_ip=ntohl(in_aton(cur)); - cur=delim+1; + *delim = 0; + np->remote_ip = ntohl(in_aton(cur)); + cur = delim + 1; printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", - np->name, HIPQUAD(np->remote_ip)); + np->name, HIPQUAD(np->remote_ip)); - if( *cur != 0 ) - { + if (*cur != 0) { /* MAC address */ if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[0]=simple_strtol(cur, NULL, 16); - cur=delim+1; + *delim = 0; + np->remote_mac[0] = simple_strtol(cur, NULL, 16); + cur = delim + 1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[1]=simple_strtol(cur, NULL, 16); - cur=delim+1; + *delim = 0; + np->remote_mac[1] = simple_strtol(cur, NULL, 16); + cur = delim + 1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[2]=simple_strtol(cur, NULL, 16); - cur=delim+1; + *delim = 0; + np->remote_mac[2] = simple_strtol(cur, NULL, 16); + cur = delim + 1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[3]=simple_strtol(cur, NULL, 16); - cur=delim+1; + *delim = 0; + np->remote_mac[3] = simple_strtol(cur, NULL, 16); + cur = delim + 1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[4]=simple_strtol(cur, NULL, 16); - cur=delim+1; - np->remote_mac[5]=simple_strtol(cur, NULL, 16); + *delim = 0; + np->remote_mac[4] = simple_strtol(cur, NULL, 16); + cur = delim + 1; + np->remote_mac[5] = simple_strtol(cur, NULL, 16); } printk(KERN_INFO "%s: remote ethernet address " @@ -633,33 +625,44 @@ int netpoll_setup(struct netpoll *np) struct in_device *in_dev; struct netpoll_info *npinfo; unsigned long flags; + int err; if (np->dev_name) ndev = dev_get_by_name(np->dev_name); if (!ndev) { printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", np->name, np->dev_name); - return -1; + return -ENODEV; } np->dev = ndev; if (!ndev->npinfo) { npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); - if (!npinfo) + if (!npinfo) { + err = -ENOMEM; goto release; + } npinfo->rx_flags = 0; npinfo->rx_np = NULL; spin_lock_init(&npinfo->poll_lock); npinfo->poll_owner = -1; - npinfo->tries = MAX_RETRIES; + spin_lock_init(&npinfo->rx_lock); - } else + skb_queue_head_init(&npinfo->arp_tx); + skb_queue_head_init(&npinfo->txq); + INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); + + atomic_set(&npinfo->refcnt, 1); + } else { npinfo = ndev->npinfo; + atomic_inc(&npinfo->refcnt); + } if (!ndev->poll_controller) { printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", np->name, np->dev_name); + err = -ENOTSUPP; goto release; } @@ -669,14 +672,15 @@ int netpoll_setup(struct netpoll *np) printk(KERN_INFO "%s: device %s not up yet, forcing it\n", np->name, np->dev_name); - rtnl_shlock(); - if (dev_change_flags(ndev, ndev->flags | IFF_UP) < 0) { + rtnl_lock(); + err = dev_open(ndev); + rtnl_unlock(); + + if (err) { printk(KERN_ERR "%s: failed to open %s\n", - np->name, np->dev_name); - rtnl_shunlock(); + np->name, ndev->name); goto release; } - rtnl_shunlock(); atleast = jiffies + HZ/10; atmost = jiffies + 4*HZ; @@ -714,6 +718,7 @@ int netpoll_setup(struct netpoll *np) rcu_read_unlock(); printk(KERN_ERR "%s: no IP address for %s, aborting\n", np->name, np->dev_name); + err = -EDESTADDRREQ; goto release; } @@ -746,8 +751,15 @@ int netpoll_setup(struct netpoll *np) kfree(npinfo); np->dev = NULL; dev_put(ndev); - return -1; + return err; +} + +static int __init netpoll_init(void) +{ + skb_queue_head_init(&skb_pool); + return 0; } +core_initcall(netpoll_init); void netpoll_cleanup(struct netpoll *np) { @@ -756,12 +768,25 @@ void netpoll_cleanup(struct netpoll *np) if (np->dev) { npinfo = np->dev->npinfo; - if (npinfo && npinfo->rx_np == np) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - npinfo->rx_np = NULL; - npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; - spin_unlock_irqrestore(&npinfo->rx_lock, flags); + if (npinfo) { + if (npinfo->rx_np == np) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + npinfo->rx_np = NULL; + npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } + + np->dev->npinfo = NULL; + if (atomic_dec_and_test(&npinfo->refcnt)) { + skb_queue_purge(&npinfo->arp_tx); + skb_queue_purge(&npinfo->txq); + cancel_rearming_delayed_work(&npinfo->tx_work); + flush_scheduled_work(); + + kfree(npinfo); + } } + dev_put(np->dev); } @@ -788,4 +813,3 @@ EXPORT_SYMBOL(netpoll_setup); EXPORT_SYMBOL(netpoll_cleanup); EXPORT_SYMBOL(netpoll_send_udp); EXPORT_SYMBOL(netpoll_poll); -EXPORT_SYMBOL(netpoll_queue);