vserver 1.9.5.x5
[linux-2.6.git] / net / core / netpoll.c
1 /*
2  * Common framework for low-level network console, dump, and debugger code
3  *
4  * Sep 8 2003  Matt Mackall <mpm@selenic.com>
5  *
6  * based on the netconsole code from:
7  *
8  * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
9  * Copyright (C) 2002  Red Hat, Inc.
10  */
11
12 #include <linux/smp_lock.h>
13 #include <linux/netdevice.h>
14 #include <linux/etherdevice.h>
15 #include <linux/string.h>
16 #include <linux/inetdevice.h>
17 #include <linux/inet.h>
18 #include <linux/interrupt.h>
19 #include <linux/netpoll.h>
20 #include <linux/sched.h>
21 #include <linux/rcupdate.h>
22 #include <net/tcp.h>
23 #include <net/udp.h>
24 #include <asm/unaligned.h>
25
26 /*
27  * We maintain a small pool of fully-sized skbs, to make sure the
28  * message gets out even in extreme OOM situations.
29  */
30
31 #define MAX_SKBS 32
32 #define MAX_UDP_CHUNK 1460
33
34 static DEFINE_SPINLOCK(skb_list_lock);
35 static int nr_skbs;
36 static struct sk_buff *skbs;
37
38 static DEFINE_SPINLOCK(rx_list_lock);
39 static LIST_HEAD(rx_list);
40
41 static atomic_t trapped;
42 static DEFINE_SPINLOCK(netpoll_poll_lock);
43
44 #define NETPOLL_RX_ENABLED  1
45 #define NETPOLL_RX_DROP     2
46
47 #define MAX_SKB_SIZE \
48                 (MAX_UDP_CHUNK + sizeof(struct udphdr) + \
49                                 sizeof(struct iphdr) + sizeof(struct ethhdr))
50
51 static void zap_completion_queue(void);
52
53 static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
54                              unsigned short ulen, u32 saddr, u32 daddr)
55 {
56         if (uh->check == 0)
57                 return 0;
58
59         if (skb->ip_summed == CHECKSUM_HW)
60                 return csum_tcpudp_magic(
61                         saddr, daddr, ulen, IPPROTO_UDP, skb->csum);
62
63         skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
64
65         return csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
66 }
67
68 /*
69  * Check whether delayed processing was scheduled for our current CPU,
70  * and then manually invoke NAPI polling to pump data off the card.
71  *
72  * In cases where there is bi-directional communications, reading only
73  * one message at a time can lead to packets being dropped by the
74  * network adapter, forcing superfluous retries and possibly timeouts.
75  * Thus, we set our budget to greater than 1.
76  */
77 static void poll_napi(struct netpoll *np)
78 {
79         int budget = 16;
80         unsigned long flags;
81         struct softnet_data *queue;
82
83         spin_lock_irqsave(&netpoll_poll_lock, flags);
84         queue = &__get_cpu_var(softnet_data);
85         if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
86             !list_empty(&queue->poll_list)) {
87                 np->dev->netpoll_rx |= NETPOLL_RX_DROP;
88                 atomic_inc(&trapped);
89
90                 np->dev->poll(np->dev, &budget);
91
92                 atomic_dec(&trapped);
93                 np->dev->netpoll_rx &= ~NETPOLL_RX_DROP;
94         }
95         spin_unlock_irqrestore(&netpoll_poll_lock, flags);
96 }
97
98 void netpoll_poll(struct netpoll *np)
99 {
100         if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
101                 return;
102
103         /* Process pending work on NIC */
104         np->dev->poll_controller(np->dev);
105         if (np->dev->poll)
106                 poll_napi(np);
107
108         zap_completion_queue();
109 }
110
111 static void refill_skbs(void)
112 {
113         struct sk_buff *skb;
114         unsigned long flags;
115
116         spin_lock_irqsave(&skb_list_lock, flags);
117         while (nr_skbs < MAX_SKBS) {
118                 skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
119                 if (!skb)
120                         break;
121
122                 skb->next = skbs;
123                 skbs = skb;
124                 nr_skbs++;
125         }
126         spin_unlock_irqrestore(&skb_list_lock, flags);
127 }
128
129 static void zap_completion_queue(void)
130 {
131         unsigned long flags;
132         struct softnet_data *sd = &get_cpu_var(softnet_data);
133
134         if (sd->completion_queue) {
135                 struct sk_buff *clist;
136
137                 local_irq_save(flags);
138                 clist = sd->completion_queue;
139                 sd->completion_queue = NULL;
140                 local_irq_restore(flags);
141
142                 while (clist != NULL) {
143                         struct sk_buff *skb = clist;
144                         clist = clist->next;
145                         __kfree_skb(skb);
146                 }
147         }
148
149         put_cpu_var(softnet_data);
150 }
151
152 static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve)
153 {
154         int once = 1, count = 0;
155         unsigned long flags;
156         struct sk_buff *skb = NULL;
157
158         zap_completion_queue();
159 repeat:
160         if (nr_skbs < MAX_SKBS)
161                 refill_skbs();
162
163         skb = alloc_skb(len, GFP_ATOMIC);
164
165         if (!skb) {
166                 spin_lock_irqsave(&skb_list_lock, flags);
167                 skb = skbs;
168                 if (skb)
169                         skbs = skb->next;
170                 skb->next = NULL;
171                 nr_skbs--;
172                 spin_unlock_irqrestore(&skb_list_lock, flags);
173         }
174
175         if(!skb) {
176                 count++;
177                 if (once && (count == 1000000)) {
178                         printk("out of netpoll skbs!\n");
179                         once = 0;
180                 }
181                 netpoll_poll(np);
182                 goto repeat;
183         }
184
185         atomic_set(&skb->users, 1);
186         skb_reserve(skb, reserve);
187         return skb;
188 }
189
190 static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
191 {
192         int status;
193
194 repeat:
195         if(!np || !np->dev || !netif_running(np->dev)) {
196                 __kfree_skb(skb);
197                 return;
198         }
199
200         spin_lock(&np->dev->xmit_lock);
201         np->dev->xmit_lock_owner = smp_processor_id();
202
203         /*
204          * network drivers do not expect to be called if the queue is
205          * stopped.
206          */
207         if (netif_queue_stopped(np->dev)) {
208                 np->dev->xmit_lock_owner = -1;
209                 spin_unlock(&np->dev->xmit_lock);
210
211                 netpoll_poll(np);
212                 goto repeat;
213         }
214
215         status = np->dev->hard_start_xmit(skb, np->dev);
216         np->dev->xmit_lock_owner = -1;
217         spin_unlock(&np->dev->xmit_lock);
218
219         /* transmit busy */
220         if(status) {
221                 netpoll_poll(np);
222                 goto repeat;
223         }
224 }
225
226 void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
227 {
228         int total_len, eth_len, ip_len, udp_len;
229         struct sk_buff *skb;
230         struct udphdr *udph;
231         struct iphdr *iph;
232         struct ethhdr *eth;
233
234         udp_len = len + sizeof(*udph);
235         ip_len = eth_len = udp_len + sizeof(*iph);
236         total_len = eth_len + ETH_HLEN;
237
238         skb = find_skb(np, total_len, total_len - len);
239         if (!skb)
240                 return;
241
242         memcpy(skb->data, msg, len);
243         skb->len += len;
244
245         udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
246         udph->source = htons(np->local_port);
247         udph->dest = htons(np->remote_port);
248         udph->len = htons(udp_len);
249         udph->check = 0;
250
251         iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
252
253         /* iph->version = 4; iph->ihl = 5; */
254         put_unaligned(0x45, (unsigned char *)iph);
255         iph->tos      = 0;
256         put_unaligned(htons(ip_len), &(iph->tot_len));
257         iph->id       = 0;
258         iph->frag_off = 0;
259         iph->ttl      = 64;
260         iph->protocol = IPPROTO_UDP;
261         iph->check    = 0;
262         put_unaligned(htonl(np->local_ip), &(iph->saddr));
263         put_unaligned(htonl(np->remote_ip), &(iph->daddr));
264         iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
265
266         eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
267
268         eth->h_proto = htons(ETH_P_IP);
269         memcpy(eth->h_source, np->local_mac, 6);
270         memcpy(eth->h_dest, np->remote_mac, 6);
271
272         netpoll_send_skb(np, skb);
273 }
274
275 static void arp_reply(struct sk_buff *skb)
276 {
277         struct arphdr *arp;
278         unsigned char *arp_ptr;
279         int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
280         u32 sip, tip;
281         struct sk_buff *send_skb;
282         unsigned long flags;
283         struct list_head *p;
284         struct netpoll *np = NULL;
285
286         spin_lock_irqsave(&rx_list_lock, flags);
287         list_for_each(p, &rx_list) {
288                 np = list_entry(p, struct netpoll, rx_list);
289                 if ( np->dev == skb->dev )
290                         break;
291                 np = NULL;
292         }
293         spin_unlock_irqrestore(&rx_list_lock, flags);
294
295         if (!np) return;
296
297         /* No arp on this interface */
298         if (skb->dev->flags & IFF_NOARP)
299                 return;
300
301         if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
302                                  (2 * skb->dev->addr_len) +
303                                  (2 * sizeof(u32)))))
304                 return;
305
306         skb->h.raw = skb->nh.raw = skb->data;
307         arp = skb->nh.arph;
308
309         if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
310              arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
311             arp->ar_pro != htons(ETH_P_IP) ||
312             arp->ar_op != htons(ARPOP_REQUEST))
313                 return;
314
315         arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len;
316         memcpy(&sip, arp_ptr, 4);
317         arp_ptr += 4 + skb->dev->addr_len;
318         memcpy(&tip, arp_ptr, 4);
319
320         /* Should we ignore arp? */
321         if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip))
322                 return;
323
324         size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4);
325         send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev),
326                             LL_RESERVED_SPACE(np->dev));
327
328         if (!send_skb)
329                 return;
330
331         send_skb->nh.raw = send_skb->data;
332         arp = (struct arphdr *) skb_put(send_skb, size);
333         send_skb->dev = skb->dev;
334         send_skb->protocol = htons(ETH_P_ARP);
335
336         /* Fill the device header for the ARP frame */
337
338         if (np->dev->hard_header &&
339             np->dev->hard_header(send_skb, skb->dev, ptype,
340                                        np->remote_mac, np->local_mac,
341                                        send_skb->len) < 0) {
342                 kfree_skb(send_skb);
343                 return;
344         }
345
346         /*
347          * Fill out the arp protocol part.
348          *
349          * we only support ethernet device type,
350          * which (according to RFC 1390) should always equal 1 (Ethernet).
351          */
352
353         arp->ar_hrd = htons(np->dev->type);
354         arp->ar_pro = htons(ETH_P_IP);
355         arp->ar_hln = np->dev->addr_len;
356         arp->ar_pln = 4;
357         arp->ar_op = htons(type);
358
359         arp_ptr=(unsigned char *)(arp + 1);
360         memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
361         arp_ptr += np->dev->addr_len;
362         memcpy(arp_ptr, &tip, 4);
363         arp_ptr += 4;
364         memcpy(arp_ptr, np->remote_mac, np->dev->addr_len);
365         arp_ptr += np->dev->addr_len;
366         memcpy(arp_ptr, &sip, 4);
367
368         netpoll_send_skb(np, send_skb);
369 }
370
371 int netpoll_rx(struct sk_buff *skb)
372 {
373         int proto, len, ulen;
374         struct iphdr *iph;
375         struct udphdr *uh;
376         struct netpoll *np;
377         struct list_head *p;
378         unsigned long flags;
379
380         if (skb->dev->type != ARPHRD_ETHER)
381                 goto out;
382
383         /* check if netpoll clients need ARP */
384         if (skb->protocol == __constant_htons(ETH_P_ARP) &&
385             atomic_read(&trapped)) {
386                 arp_reply(skb);
387                 return 1;
388         }
389
390         proto = ntohs(eth_hdr(skb)->h_proto);
391         if (proto != ETH_P_IP)
392                 goto out;
393         if (skb->pkt_type == PACKET_OTHERHOST)
394                 goto out;
395         if (skb_shared(skb))
396                 goto out;
397
398         iph = (struct iphdr *)skb->data;
399         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
400                 goto out;
401         if (iph->ihl < 5 || iph->version != 4)
402                 goto out;
403         if (!pskb_may_pull(skb, iph->ihl*4))
404                 goto out;
405         if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
406                 goto out;
407
408         len = ntohs(iph->tot_len);
409         if (skb->len < len || len < iph->ihl*4)
410                 goto out;
411
412         if (iph->protocol != IPPROTO_UDP)
413                 goto out;
414
415         len -= iph->ihl*4;
416         uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
417         ulen = ntohs(uh->len);
418
419         if (ulen != len)
420                 goto out;
421         if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr) < 0)
422                 goto out;
423
424         spin_lock_irqsave(&rx_list_lock, flags);
425         list_for_each(p, &rx_list) {
426                 np = list_entry(p, struct netpoll, rx_list);
427                 if (np->dev && np->dev != skb->dev)
428                         continue;
429                 if (np->local_ip && np->local_ip != ntohl(iph->daddr))
430                         continue;
431                 if (np->remote_ip && np->remote_ip != ntohl(iph->saddr))
432                         continue;
433                 if (np->local_port && np->local_port != ntohs(uh->dest))
434                         continue;
435
436                 spin_unlock_irqrestore(&rx_list_lock, flags);
437
438                 if (np->rx_hook)
439                         np->rx_hook(np, ntohs(uh->source),
440                                     (char *)(uh+1),
441                                     ulen - sizeof(struct udphdr));
442
443                 return 1;
444         }
445         spin_unlock_irqrestore(&rx_list_lock, flags);
446
447 out:
448         return atomic_read(&trapped);
449 }
450
451 int netpoll_parse_options(struct netpoll *np, char *opt)
452 {
453         char *cur=opt, *delim;
454
455         if(*cur != '@') {
456                 if ((delim = strchr(cur, '@')) == NULL)
457                         goto parse_failed;
458                 *delim=0;
459                 np->local_port=simple_strtol(cur, NULL, 10);
460                 cur=delim;
461         }
462         cur++;
463         printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port);
464
465         if(*cur != '/') {
466                 if ((delim = strchr(cur, '/')) == NULL)
467                         goto parse_failed;
468                 *delim=0;
469                 np->local_ip=ntohl(in_aton(cur));
470                 cur=delim;
471
472                 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
473                        np->name, HIPQUAD(np->local_ip));
474         }
475         cur++;
476
477         if ( *cur != ',') {
478                 /* parse out dev name */
479                 if ((delim = strchr(cur, ',')) == NULL)
480                         goto parse_failed;
481                 *delim=0;
482                 strlcpy(np->dev_name, cur, sizeof(np->dev_name));
483                 cur=delim;
484         }
485         cur++;
486
487         printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name);
488
489         if ( *cur != '@' ) {
490                 /* dst port */
491                 if ((delim = strchr(cur, '@')) == NULL)
492                         goto parse_failed;
493                 *delim=0;
494                 np->remote_port=simple_strtol(cur, NULL, 10);
495                 cur=delim;
496         }
497         cur++;
498         printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port);
499
500         /* dst ip */
501         if ((delim = strchr(cur, '/')) == NULL)
502                 goto parse_failed;
503         *delim=0;
504         np->remote_ip=ntohl(in_aton(cur));
505         cur=delim+1;
506
507         printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
508                        np->name, HIPQUAD(np->remote_ip));
509
510         if( *cur != 0 )
511         {
512                 /* MAC address */
513                 if ((delim = strchr(cur, ':')) == NULL)
514                         goto parse_failed;
515                 *delim=0;
516                 np->remote_mac[0]=simple_strtol(cur, NULL, 16);
517                 cur=delim+1;
518                 if ((delim = strchr(cur, ':')) == NULL)
519                         goto parse_failed;
520                 *delim=0;
521                 np->remote_mac[1]=simple_strtol(cur, NULL, 16);
522                 cur=delim+1;
523                 if ((delim = strchr(cur, ':')) == NULL)
524                         goto parse_failed;
525                 *delim=0;
526                 np->remote_mac[2]=simple_strtol(cur, NULL, 16);
527                 cur=delim+1;
528                 if ((delim = strchr(cur, ':')) == NULL)
529                         goto parse_failed;
530                 *delim=0;
531                 np->remote_mac[3]=simple_strtol(cur, NULL, 16);
532                 cur=delim+1;
533                 if ((delim = strchr(cur, ':')) == NULL)
534                         goto parse_failed;
535                 *delim=0;
536                 np->remote_mac[4]=simple_strtol(cur, NULL, 16);
537                 cur=delim+1;
538                 np->remote_mac[5]=simple_strtol(cur, NULL, 16);
539         }
540
541         printk(KERN_INFO "%s: remote ethernet address "
542                "%02x:%02x:%02x:%02x:%02x:%02x\n",
543                np->name,
544                np->remote_mac[0],
545                np->remote_mac[1],
546                np->remote_mac[2],
547                np->remote_mac[3],
548                np->remote_mac[4],
549                np->remote_mac[5]);
550
551         return 0;
552
553  parse_failed:
554         printk(KERN_INFO "%s: couldn't parse config at %s!\n",
555                np->name, cur);
556         return -1;
557 }
558
559 int netpoll_setup(struct netpoll *np)
560 {
561         struct net_device *ndev = NULL;
562         struct in_device *in_dev;
563
564         if (np->dev_name)
565                 ndev = dev_get_by_name(np->dev_name);
566         if (!ndev) {
567                 printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
568                        np->name, np->dev_name);
569                 return -1;
570         }
571         if (!ndev->poll_controller) {
572                 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
573                        np->name, np->dev_name);
574                 goto release;
575         }
576
577         if (!netif_running(ndev)) {
578                 unsigned short oflags;
579                 unsigned long atmost, atleast;
580
581                 printk(KERN_INFO "%s: device %s not up yet, forcing it\n",
582                        np->name, np->dev_name);
583
584                 oflags = ndev->flags;
585
586                 rtnl_shlock();
587                 if (dev_change_flags(ndev, oflags | IFF_UP) < 0) {
588                         printk(KERN_ERR "%s: failed to open %s\n",
589                                np->name, np->dev_name);
590                         rtnl_shunlock();
591                         goto release;
592                 }
593                 rtnl_shunlock();
594
595                 atleast = jiffies + HZ/10;
596                 atmost = jiffies + 10*HZ;
597                 while (!netif_carrier_ok(ndev)) {
598                         if (time_after(jiffies, atmost)) {
599                                 printk(KERN_NOTICE
600                                        "%s: timeout waiting for carrier\n",
601                                        np->name);
602                                 break;
603                         }
604                         cond_resched();
605                 }
606
607                 if (time_before(jiffies, atleast)) {
608                         printk(KERN_NOTICE "%s: carrier detect appears flaky,"
609                                " waiting 10 seconds\n",
610                                np->name);
611                         while (time_before(jiffies, atmost))
612                                 cond_resched();
613                 }
614         }
615
616         if (!memcmp(np->local_mac, "\0\0\0\0\0\0", 6) && ndev->dev_addr)
617                 memcpy(np->local_mac, ndev->dev_addr, 6);
618
619         if (!np->local_ip) {
620                 rcu_read_lock();
621                 in_dev = __in_dev_get(ndev);
622
623                 if (!in_dev || !in_dev->ifa_list) {
624                         rcu_read_unlock();
625                         printk(KERN_ERR "%s: no IP address for %s, aborting\n",
626                                np->name, np->dev_name);
627                         goto release;
628                 }
629
630                 np->local_ip = ntohl(in_dev->ifa_list->ifa_local);
631                 rcu_read_unlock();
632                 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
633                        np->name, HIPQUAD(np->local_ip));
634         }
635
636         np->dev = ndev;
637
638         if(np->rx_hook) {
639                 unsigned long flags;
640
641                 np->dev->netpoll_rx = NETPOLL_RX_ENABLED;
642
643                 spin_lock_irqsave(&rx_list_lock, flags);
644                 list_add(&np->rx_list, &rx_list);
645                 spin_unlock_irqrestore(&rx_list_lock, flags);
646         }
647
648         return 0;
649  release:
650         dev_put(ndev);
651         return -1;
652 }
653
654 void netpoll_cleanup(struct netpoll *np)
655 {
656         if (np->rx_hook) {
657                 unsigned long flags;
658
659                 spin_lock_irqsave(&rx_list_lock, flags);
660                 list_del(&np->rx_list);
661                 spin_unlock_irqrestore(&rx_list_lock, flags);
662         }
663
664         if (np->dev)
665                 np->dev->netpoll_rx = 0;
666         dev_put(np->dev);
667         np->dev = NULL;
668 }
669
670 int netpoll_trap(void)
671 {
672         return atomic_read(&trapped);
673 }
674
675 void netpoll_set_trap(int trap)
676 {
677         if (trap)
678                 atomic_inc(&trapped);
679         else
680                 atomic_dec(&trapped);
681 }
682
683 EXPORT_SYMBOL(netpoll_set_trap);
684 EXPORT_SYMBOL(netpoll_trap);
685 EXPORT_SYMBOL(netpoll_parse_options);
686 EXPORT_SYMBOL(netpoll_setup);
687 EXPORT_SYMBOL(netpoll_cleanup);
688 EXPORT_SYMBOL(netpoll_send_udp);
689 EXPORT_SYMBOL(netpoll_poll);