X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fcore%2Fnetpoll.c;h=e8e05cebd95adaeee351c84144d6c51250b503e9;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=2f0115eef7ca685325c3b3b130f04dd467523ad8;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 2f0115eef..e8e05cebd 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -13,30 +13,41 @@ #include #include #include +#include #include #include #include #include #include +#include +#include +#include #include #include +#include /* * We maintain a small pool of fully-sized skbs, to make sure the * message gets out even in extreme OOM situations. */ -#define MAX_SKBS 32 #define MAX_UDP_CHUNK 1460 +#define MAX_SKBS 32 +#define MAX_QUEUE_DEPTH (MAX_SKBS / 2) +#define MAX_RETRIES 20000 -static spinlock_t skb_list_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(skb_list_lock); static int nr_skbs; static struct sk_buff *skbs; -static spinlock_t rx_list_lock = SPIN_LOCK_UNLOCKED; -static LIST_HEAD(rx_list); +static DEFINE_SPINLOCK(queue_lock); +static int queue_depth; +static struct sk_buff *queue_head, *queue_tail; + +static atomic_t trapped; -static int trapped; +#define NETPOLL_RX_ENABLED 1 +#define NETPOLL_RX_DROP 2 #define MAX_SKB_SIZE \ (MAX_UDP_CHUNK + sizeof(struct udphdr) + \ @@ -44,35 +55,114 @@ static int trapped; static void zap_completion_queue(void); +static void queue_process(void *p) +{ + unsigned long flags; + struct sk_buff *skb; + + while (queue_head) { + spin_lock_irqsave(&queue_lock, flags); + + skb = queue_head; + queue_head = skb->next; + if (skb == queue_tail) + queue_head = NULL; + + queue_depth--; + + spin_unlock_irqrestore(&queue_lock, flags); + + dev_queue_xmit(skb); + } +} + +static DECLARE_WORK(send_queue, queue_process, NULL); + +void netpoll_queue(struct sk_buff *skb) +{ + unsigned long flags; + + if (queue_depth == MAX_QUEUE_DEPTH) { + __kfree_skb(skb); + return; + } + + spin_lock_irqsave(&queue_lock, flags); + if (!queue_head) + queue_head = skb; + else + queue_tail->next = skb; + queue_tail = skb; + queue_depth++; + spin_unlock_irqrestore(&queue_lock, flags); + + schedule_work(&send_queue); +} + static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, unsigned short ulen, u32 saddr, u32 daddr) { - if (uh->check == 0) + unsigned int psum; + + if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY) return 0; - if (skb->ip_summed == CHECKSUM_HW) - return csum_tcpudp_magic( - saddr, daddr, ulen, IPPROTO_UDP, skb->csum); + psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); - skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); + if (skb->ip_summed == CHECKSUM_HW && + !(u16)csum_fold(csum_add(psum, skb->csum))) + return 0; + + skb->csum = psum; - return csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); + return __skb_checksum_complete(skb); } -void netpoll_poll(struct netpoll *np) +/* + * Check whether delayed processing was scheduled for our NIC. If so, + * we attempt to grab the poll lock and use ->poll() to pump the card. + * If this fails, either we've recursed in ->poll() or it's already + * running on another CPU. + * + * Note: we don't mask interrupts with this lock because we're using + * trylock here and interrupts are already disabled in the softirq + * case. Further, we test the poll_owner to avoid recursion on UP + * systems where the lock doesn't exist. + * + * In cases where there is bi-directional communications, reading only + * one message at a time can lead to packets being dropped by the + * network adapter, forcing superfluous retries and possibly timeouts. + * Thus, we set our budget to greater than 1. + */ +static void poll_napi(struct netpoll *np) { - int budget = 1; + struct netpoll_info *npinfo = np->dev->npinfo; + int budget = 16; + + if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) && + npinfo->poll_owner != smp_processor_id() && + spin_trylock(&npinfo->poll_lock)) { + npinfo->rx_flags |= NETPOLL_RX_DROP; + atomic_inc(&trapped); + np->dev->poll(np->dev, &budget); + + atomic_dec(&trapped); + npinfo->rx_flags &= ~NETPOLL_RX_DROP; + spin_unlock(&npinfo->poll_lock); + } +} + +void netpoll_poll(struct netpoll *np) +{ if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) return; /* Process pending work on NIC */ np->dev->poll_controller(np->dev); + if (np->dev->poll) + poll_napi(np); - /* If scheduling is stopped, tickle NAPI bits */ - if(trapped && np->dev->poll && - test_bit(__LINK_STATE_RX_SCHED, &np->dev->state)) - np->dev->poll(np->dev, &budget); zap_completion_queue(); } @@ -110,7 +200,10 @@ static void zap_completion_queue(void) while (clist != NULL) { struct sk_buff *skb = clist; clist = clist->next; - __kfree_skb(skb); + if(skb->destructor) + dev_kfree_skb_any(skb); /* put this one back */ + else + __kfree_skb(skb); } } @@ -133,10 +226,11 @@ repeat: if (!skb) { spin_lock_irqsave(&skb_list_lock, flags); skb = skbs; - if (skb) + if (skb) { skbs = skb->next; - skb->next = NULL; - nr_skbs--; + skb->next = NULL; + nr_skbs--; + } spin_unlock_irqrestore(&skb_list_lock, flags); } @@ -155,28 +249,59 @@ repeat: return skb; } -void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) +static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { int status; + struct netpoll_info *npinfo; -repeat: - if(!np || !np->dev || !netif_running(np->dev)) { + if (!np || !np->dev || !netif_running(np->dev)) { __kfree_skb(skb); return; } - spin_lock(&np->dev->xmit_lock); - np->dev->xmit_lock_owner = smp_processor_id(); + npinfo = np->dev->npinfo; - status = np->dev->hard_start_xmit(skb, np->dev); - np->dev->xmit_lock_owner = -1; - spin_unlock(&np->dev->xmit_lock); + /* avoid recursion */ + if (npinfo->poll_owner == smp_processor_id() || + np->dev->xmit_lock_owner == smp_processor_id()) { + if (np->drop) + np->drop(skb); + else + __kfree_skb(skb); + return; + } + + do { + npinfo->tries--; + spin_lock(&np->dev->xmit_lock); + np->dev->xmit_lock_owner = smp_processor_id(); + + /* + * network drivers do not expect to be called if the queue is + * stopped. + */ + if (netif_queue_stopped(np->dev)) { + np->dev->xmit_lock_owner = -1; + spin_unlock(&np->dev->xmit_lock); + netpoll_poll(np); + udelay(50); + continue; + } - /* transmit busy */ - if(status) { + status = np->dev->hard_start_xmit(skb, np->dev); + np->dev->xmit_lock_owner = -1; + spin_unlock(&np->dev->xmit_lock); + + /* success */ + if(!status) { + npinfo->tries = MAX_RETRIES; /* reset */ + return; + } + + /* transmit busy */ netpoll_poll(np); - goto repeat; - } + udelay(50); + } while (npinfo->tries > 0); } void netpoll_send_udp(struct netpoll *np, const char *msg, int len) @@ -189,7 +314,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) udp_len = len + sizeof(*udph); ip_len = eth_len = udp_len + sizeof(*iph); - total_len = eth_len + ETH_HLEN; + total_len = eth_len + ETH_HLEN + NET_IP_ALIGN; skb = find_skb(np, total_len, total_len - len); if (!skb) @@ -206,17 +331,17 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) iph = (struct iphdr *)skb_push(skb, sizeof(*iph)); - iph->version = 4; - iph->ihl = 5; + /* iph->version = 4; iph->ihl = 5; */ + put_unaligned(0x45, (unsigned char *)iph); iph->tos = 0; - iph->tot_len = htons(ip_len); + put_unaligned(htons(ip_len), &(iph->tot_len)); iph->id = 0; iph->frag_off = 0; iph->ttl = 64; iph->protocol = IPPROTO_UDP; iph->check = 0; - iph->saddr = htonl(np->local_ip); - iph->daddr = htonl(np->remote_ip); + put_unaligned(htonl(np->local_ip), &(iph->saddr)); + put_unaligned(htonl(np->remote_ip), &(iph->daddr)); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); @@ -225,30 +350,25 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) memcpy(eth->h_source, np->local_mac, 6); memcpy(eth->h_dest, np->remote_mac, 6); + skb->dev = np->dev; + netpoll_send_skb(np, skb); } static void arp_reply(struct sk_buff *skb) { + struct netpoll_info *npinfo = skb->dev->npinfo; struct arphdr *arp; unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; u32 sip, tip; struct sk_buff *send_skb; - unsigned long flags; - struct list_head *p; - struct netpoll *np = 0; - - spin_lock_irqsave(&rx_list_lock, flags); - list_for_each(p, &rx_list) { - np = list_entry(p, struct netpoll, rx_list); - if ( np->dev == skb->dev ) - break; - np = 0; - } - spin_unlock_irqrestore(&rx_list_lock, flags); + struct netpoll *np = NULL; - if (!np) return; + if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev) + np = npinfo->rx_np; + if (!np) + return; /* No arp on this interface */ if (skb->dev->flags & IFF_NOARP) @@ -324,25 +444,26 @@ static void arp_reply(struct sk_buff *skb) netpoll_send_skb(np, send_skb); } -int netpoll_rx(struct sk_buff *skb) +int __netpoll_rx(struct sk_buff *skb) { int proto, len, ulen; struct iphdr *iph; struct udphdr *uh; - struct netpoll *np; - struct list_head *p; - unsigned long flags; + struct netpoll *np = skb->dev->npinfo->rx_np; + if (!np) + goto out; if (skb->dev->type != ARPHRD_ETHER) goto out; /* check if netpoll clients need ARP */ - if (skb->protocol == __constant_htons(ETH_P_ARP) && trapped) { + if (skb->protocol == __constant_htons(ETH_P_ARP) && + atomic_read(&trapped)) { arp_reply(skb); return 1; } - proto = ntohs(skb->mac.ethernet->h_proto); + proto = ntohs(eth_hdr(skb)->h_proto); if (proto != ETH_P_IP) goto out; if (skb->pkt_type == PACKET_OTHERHOST) @@ -373,34 +494,29 @@ int netpoll_rx(struct sk_buff *skb) if (ulen != len) goto out; - if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr) < 0) + if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) + goto out; + if (np->local_ip && np->local_ip != ntohl(iph->daddr)) + goto out; + if (np->remote_ip && np->remote_ip != ntohl(iph->saddr)) + goto out; + if (np->local_port && np->local_port != ntohs(uh->dest)) goto out; - spin_lock_irqsave(&rx_list_lock, flags); - list_for_each(p, &rx_list) { - np = list_entry(p, struct netpoll, rx_list); - if (np->dev && np->dev != skb->dev) - continue; - if (np->local_ip && np->local_ip != ntohl(iph->daddr)) - continue; - if (np->remote_ip && np->remote_ip != ntohl(iph->saddr)) - continue; - if (np->local_port && np->local_port != ntohs(uh->dest)) - continue; - - spin_unlock_irqrestore(&rx_list_lock, flags); + np->rx_hook(np, ntohs(uh->source), + (char *)(uh+1), + ulen - sizeof(struct udphdr)); - if (np->rx_hook) - np->rx_hook(np, ntohs(uh->source), - (char *)(uh+1), - ulen - sizeof(struct udphdr)); + kfree_skb(skb); + return 1; +out: + if (atomic_read(&trapped)) { + kfree_skb(skb); return 1; } - spin_unlock_irqrestore(&rx_list_lock, flags); -out: - return trapped; + return 0; } int netpoll_parse_options(struct netpoll *np, char *opt) @@ -411,7 +527,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if ((delim = strchr(cur, '@')) == NULL) goto parse_failed; *delim=0; - np->local_port=simple_strtol(cur, 0, 10); + np->local_port=simple_strtol(cur, NULL, 10); cur=delim; } cur++; @@ -446,7 +562,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if ((delim = strchr(cur, '@')) == NULL) goto parse_failed; *delim=0; - np->remote_port=simple_strtol(cur, 0, 10); + np->remote_port=simple_strtol(cur, NULL, 10); cur=delim; } cur++; @@ -468,29 +584,29 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; *delim=0; - np->remote_mac[0]=simple_strtol(cur, 0, 16); + np->remote_mac[0]=simple_strtol(cur, NULL, 16); cur=delim+1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; *delim=0; - np->remote_mac[1]=simple_strtol(cur, 0, 16); + np->remote_mac[1]=simple_strtol(cur, NULL, 16); cur=delim+1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; *delim=0; - np->remote_mac[2]=simple_strtol(cur, 0, 16); + np->remote_mac[2]=simple_strtol(cur, NULL, 16); cur=delim+1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; *delim=0; - np->remote_mac[3]=simple_strtol(cur, 0, 16); + np->remote_mac[3]=simple_strtol(cur, NULL, 16); cur=delim+1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; *delim=0; - np->remote_mac[4]=simple_strtol(cur, 0, 16); + np->remote_mac[4]=simple_strtol(cur, NULL, 16); cur=delim+1; - np->remote_mac[5]=simple_strtol(cur, 0, 16); + np->remote_mac[5]=simple_strtol(cur, NULL, 16); } printk(KERN_INFO "%s: remote ethernet address " @@ -515,6 +631,8 @@ int netpoll_setup(struct netpoll *np) { struct net_device *ndev = NULL; struct in_device *in_dev; + struct netpoll_info *npinfo; + unsigned long flags; if (np->dev_name) ndev = dev_get_by_name(np->dev_name); @@ -523,32 +641,45 @@ int netpoll_setup(struct netpoll *np) np->name, np->dev_name); return -1; } + + np->dev = ndev; + if (!ndev->npinfo) { + npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); + if (!npinfo) + goto release; + + npinfo->rx_flags = 0; + npinfo->rx_np = NULL; + spin_lock_init(&npinfo->poll_lock); + npinfo->poll_owner = -1; + npinfo->tries = MAX_RETRIES; + spin_lock_init(&npinfo->rx_lock); + } else + npinfo = ndev->npinfo; + if (!ndev->poll_controller) { printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", np->name, np->dev_name); goto release; } - if (!(ndev->flags & IFF_UP)) { - unsigned short oflags; + if (!netif_running(ndev)) { unsigned long atmost, atleast; printk(KERN_INFO "%s: device %s not up yet, forcing it\n", np->name, np->dev_name); - oflags = ndev->flags; - - rtnl_shlock(); - if (dev_change_flags(ndev, oflags | IFF_UP) < 0) { + rtnl_lock(); + if (dev_change_flags(ndev, ndev->flags | IFF_UP) < 0) { printk(KERN_ERR "%s: failed to open %s\n", np->name, np->dev_name); - rtnl_shunlock(); + rtnl_unlock(); goto release; } - rtnl_shunlock(); + rtnl_unlock(); atleast = jiffies + HZ/10; - atmost = jiffies + 10*HZ; + atmost = jiffies + 4*HZ; while (!netif_carrier_ok(ndev)) { if (time_after(jiffies, atmost)) { printk(KERN_NOTICE @@ -559,78 +690,95 @@ int netpoll_setup(struct netpoll *np) cond_resched(); } + /* If carrier appears to come up instantly, we don't + * trust it and pause so that we don't pump all our + * queued console messages into the bitbucket. + */ + if (time_before(jiffies, atleast)) { - printk(KERN_NOTICE "%s: carrier detect appears flaky," - " waiting 10 seconds\n", + printk(KERN_NOTICE "%s: carrier detect appears" + " untrustworthy, waiting 4 seconds\n", np->name); - while (time_before(jiffies, atmost)) - cond_resched(); + msleep(4000); } } - if (!memcmp(np->local_mac, "\0\0\0\0\0\0", 6) && ndev->dev_addr) + if (is_zero_ether_addr(np->local_mac) && ndev->dev_addr) memcpy(np->local_mac, ndev->dev_addr, 6); if (!np->local_ip) { - in_dev = in_dev_get(ndev); + rcu_read_lock(); + in_dev = __in_dev_get_rcu(ndev); - if (!in_dev) { + if (!in_dev || !in_dev->ifa_list) { + rcu_read_unlock(); printk(KERN_ERR "%s: no IP address for %s, aborting\n", np->name, np->dev_name); goto release; } np->local_ip = ntohl(in_dev->ifa_list->ifa_local); - in_dev_put(in_dev); + rcu_read_unlock(); printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", np->name, HIPQUAD(np->local_ip)); } - np->dev = ndev; + if (np->rx_hook) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + npinfo->rx_flags |= NETPOLL_RX_ENABLED; + npinfo->rx_np = np; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } - if(np->rx_hook) { - unsigned long flags; + /* fill up the skb queue */ + refill_skbs(); -#ifdef CONFIG_NETPOLL_RX - np->dev->netpoll_rx = 1; -#endif + /* last thing to do is link it to the net device structure */ + ndev->npinfo = npinfo; - spin_lock_irqsave(&rx_list_lock, flags); - list_add(&np->rx_list, &rx_list); - spin_unlock_irqrestore(&rx_list_lock, flags); - } + /* avoid racing with NAPI reading npinfo */ + synchronize_rcu(); return 0; + release: + if (!ndev->npinfo) + kfree(npinfo); + np->dev = NULL; dev_put(ndev); return -1; } void netpoll_cleanup(struct netpoll *np) { - if(np->rx_hook) { - unsigned long flags; - - spin_lock_irqsave(&rx_list_lock, flags); - list_del(&np->rx_list); -#ifdef CONFIG_NETPOLL_RX - np->dev->netpoll_rx = 0; -#endif - spin_unlock_irqrestore(&rx_list_lock, flags); + struct netpoll_info *npinfo; + unsigned long flags; + + if (np->dev) { + npinfo = np->dev->npinfo; + if (npinfo && npinfo->rx_np == np) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + npinfo->rx_np = NULL; + npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } + dev_put(np->dev); } - dev_put(np->dev); - np->dev = 0; + np->dev = NULL; } -int netpoll_trap() +int netpoll_trap(void) { - return trapped; + return atomic_read(&trapped); } void netpoll_set_trap(int trap) { - trapped = trap; + if (trap) + atomic_inc(&trapped); + else + atomic_dec(&trapped); } EXPORT_SYMBOL(netpoll_set_trap); @@ -638,6 +786,6 @@ EXPORT_SYMBOL(netpoll_trap); EXPORT_SYMBOL(netpoll_parse_options); EXPORT_SYMBOL(netpoll_setup); EXPORT_SYMBOL(netpoll_cleanup); -EXPORT_SYMBOL(netpoll_send_skb); EXPORT_SYMBOL(netpoll_send_udp); EXPORT_SYMBOL(netpoll_poll); +EXPORT_SYMBOL(netpoll_queue);