upgrade to linux 2.6.10-1.12_FC2
[linux-2.6.git] / net / core / netpoll.c
1 /*
2  * Common framework for low-level network console, dump, and debugger code
3  *
4  * Sep 8 2003  Matt Mackall <mpm@selenic.com>
5  *
6  * based on the netconsole code from:
7  *
8  * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
9  * Copyright (C) 2002  Red Hat, Inc.
10  */
11
12 #include <linux/smp_lock.h>
13 #include <linux/netdevice.h>
14 #include <linux/etherdevice.h>
15 #include <linux/string.h>
16 #include <linux/inetdevice.h>
17 #include <linux/inet.h>
18 #include <linux/interrupt.h>
19 #include <linux/netpoll.h>
20 #include <linux/sched.h>
21 #include <linux/rcupdate.h>
22 #include <linux/nmi.h>
23 #include <net/tcp.h>
24 #include <net/udp.h>
25 #include <asm/unaligned.h>
26 #include <asm/byteorder.h>
27
28 /*
29  * We maintain a small pool of fully-sized skbs, to make sure the
30  * message gets out even in extreme OOM situations.
31  */
32
33 #define MAX_SKBS 32
34 #define MAX_UDP_CHUNK 1460
35
36 static spinlock_t skb_list_lock = SPIN_LOCK_UNLOCKED;
37 static int nr_skbs;
38 static struct sk_buff *skbs;
39
40 static spinlock_t rx_list_lock = SPIN_LOCK_UNLOCKED;
41 static LIST_HEAD(rx_list);
42
43 static atomic_t trapped;
44 spinlock_t netpoll_poll_lock = SPIN_LOCK_UNLOCKED;
45
46 #define NETPOLL_RX_ENABLED  1
47 #define NETPOLL_RX_DROP     2
48
49 #define MAX_SKB_SIZE \
50                 (MAX_UDP_CHUNK + sizeof(struct udphdr) + \
51                                 sizeof(struct iphdr) + sizeof(struct ethhdr))
52
53 static void zap_completion_queue(void);
54
55 static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
56                              unsigned short ulen, u32 saddr, u32 daddr)
57 {
58         if (uh->check == 0)
59                 return 0;
60
61         if (skb->ip_summed == CHECKSUM_HW)
62                 return csum_tcpudp_magic(
63                         saddr, daddr, ulen, IPPROTO_UDP, skb->csum);
64
65         skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
66
67         return csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
68 }
69
70 /*
71  * Check whether delayed processing was scheduled for our current CPU,
72  * and then manually invoke NAPI polling to pump data off the card.
73  *
74  * In cases where there is bi-directional communications, reading only
75  * one message at a time can lead to packets being dropped by the
76  * network adapter, forcing superfluous retries and possibly timeouts.
77  * Thus, we set our budget to greater than 1.
78  */
79 static void poll_napi(struct netpoll *np)
80 {
81         int budget = netdump_mode ? 64 : 16;
82         unsigned long flags;
83         struct softnet_data *queue;
84
85         spin_lock_irqsave(&netpoll_poll_lock, flags);
86         queue = &__get_cpu_var(softnet_data);
87         if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
88             !list_empty(&queue->poll_list)) {
89                 np->dev->netpoll_rx |= NETPOLL_RX_DROP;
90                 atomic_inc(&trapped);
91
92                 np->dev->poll(np->dev, &budget);
93
94                 atomic_dec(&trapped);
95                 np->dev->netpoll_rx &= ~NETPOLL_RX_DROP;
96         }
97         spin_unlock_irqrestore(&netpoll_poll_lock, flags);
98 }
99
100 void netpoll_poll(struct netpoll *np)
101 {
102         if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
103                 return;
104
105         /* Process pending work on NIC */
106         np->dev->poll_controller(np->dev);
107         if (np->dev->poll)
108                 poll_napi(np);
109
110         zap_completion_queue();
111 }
112
113 static void refill_skbs(void)
114 {
115         struct sk_buff *skb;
116         unsigned long flags;
117
118         spin_lock_irqsave(&skb_list_lock, flags);
119         while (nr_skbs < MAX_SKBS) {
120                 skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
121                 if (!skb)
122                         break;
123
124                 skb->next = skbs;
125                 skbs = skb;
126                 nr_skbs++;
127         }
128         spin_unlock_irqrestore(&skb_list_lock, flags);
129 }
130
131 static void zap_completion_queue(void)
132 {
133         unsigned long flags;
134         struct softnet_data *sd = &get_cpu_var(softnet_data);
135
136         if (sd->completion_queue) {
137                 struct sk_buff *clist;
138
139                 local_irq_save(flags);
140                 clist = sd->completion_queue;
141                 sd->completion_queue = NULL;
142                 local_irq_restore(flags);
143
144                 while (clist != NULL) {
145                         struct sk_buff *skb = clist;
146                         clist = clist->next;
147                         __kfree_skb(skb);
148                 }
149         }
150
151         put_cpu_var(softnet_data);
152         touch_nmi_watchdog();
153 }
154
155 static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve)
156 {
157         int once = 1, count = 0;
158         unsigned long flags;
159         struct sk_buff *skb = NULL;
160
161         zap_completion_queue();
162 repeat:
163         if (nr_skbs < MAX_SKBS)
164                 refill_skbs();
165
166         skb = alloc_skb(len, GFP_ATOMIC);
167
168         if (!skb) {
169                 spin_lock_irqsave(&skb_list_lock, flags);
170                 skb = skbs;
171                 if (skb)
172                         skbs = skb->next;
173                 skb->next = NULL;
174                 nr_skbs--;
175                 spin_unlock_irqrestore(&skb_list_lock, flags);
176         }
177
178         if(!skb) {
179                 count++;
180                 if (once && (count == 1000000)) {
181                         printk("out of netpoll skbs!\n");
182                         once = 0;
183                 }
184                 netpoll_poll(np);
185                 goto repeat;
186         }
187
188         atomic_set(&skb->users, 1);
189         skb_reserve(skb, reserve);
190         return skb;
191 }
192
193 void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
194 {
195         int status;
196
197 repeat:
198         if(!np || !np->dev || !netif_running(np->dev)) {
199                 __kfree_skb(skb);
200                 return;
201         }
202
203         spin_lock(&np->dev->xmit_lock);
204         np->dev->xmit_lock_owner = smp_processor_id();
205
206         /*
207          * network drivers do not expect to be called if the queue is
208          * stopped.
209          */
210         if (netif_queue_stopped(np->dev)) {
211                 np->dev->xmit_lock_owner = -1;
212                 spin_unlock(&np->dev->xmit_lock);
213
214                 netpoll_poll(np);
215                 goto repeat;
216         }
217
218         status = np->dev->hard_start_xmit(skb, np->dev);
219         np->dev->xmit_lock_owner = -1;
220         spin_unlock(&np->dev->xmit_lock);
221
222         /* transmit busy */
223         if(status) {
224                 netpoll_poll(np);
225                 goto repeat;
226         }
227 }
228
229 void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
230 {
231         int total_len, eth_len, ip_len, udp_len;
232         struct sk_buff *skb;
233         struct udphdr *udph;
234         struct iphdr *iph;
235         struct ethhdr *eth;
236
237         udp_len = len + sizeof(*udph);
238         ip_len = eth_len = udp_len + sizeof(*iph);
239         total_len = eth_len + ETH_HLEN;
240
241         skb = find_skb(np, total_len, total_len - len);
242         if (!skb)
243                 return;
244
245         memcpy(skb->data, msg, len);
246         skb->len += len;
247
248         udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
249         udph->source = htons(np->local_port);
250         udph->dest = htons(np->remote_port);
251         udph->len = htons(udp_len);
252         udph->check = 0;
253
254         iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
255
256         /* iph->version = 4; iph->ihl = 5; */
257         put_unaligned(0x45, (unsigned char *)iph);
258         iph->tos      = 0;
259         put_unaligned(htons(ip_len), &(iph->tot_len));
260         iph->id       = 0;
261         iph->frag_off = 0;
262         iph->ttl      = 64;
263         iph->protocol = IPPROTO_UDP;
264         iph->check    = 0;
265         put_unaligned(htonl(np->local_ip), &(iph->saddr));
266         put_unaligned(htonl(np->remote_ip), &(iph->daddr));
267         iph->check    = ip_fast_csum((unsigned char *)iph, 5);
268
269         eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
270
271         eth->h_proto = htons(ETH_P_IP);
272         memcpy(eth->h_source, np->local_mac, 6);
273         memcpy(eth->h_dest, np->remote_mac, 6);
274
275         netpoll_send_skb(np, skb);
276 }
277
278 static void arp_reply(struct sk_buff *skb)
279 {
280         struct arphdr *arp;
281         unsigned char *arp_ptr;
282         int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
283         u32 sip, tip;
284         struct sk_buff *send_skb;
285         unsigned long flags;
286         struct list_head *p;
287         struct netpoll *np = NULL;
288
289         spin_lock_irqsave(&rx_list_lock, flags);
290         list_for_each(p, &rx_list) {
291                 np = list_entry(p, struct netpoll, rx_list);
292                 if ( np->dev == skb->dev )
293                         break;
294                 np = NULL;
295         }
296         spin_unlock_irqrestore(&rx_list_lock, flags);
297
298         if (!np) return;
299
300         /* No arp on this interface */
301         if (skb->dev->flags & IFF_NOARP)
302                 return;
303
304         if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
305                                  (2 * skb->dev->addr_len) +
306                                  (2 * sizeof(u32)))))
307                 return;
308
309         skb->h.raw = skb->nh.raw = skb->data;
310         arp = skb->nh.arph;
311
312         if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
313              arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
314             arp->ar_pro != htons(ETH_P_IP) ||
315             arp->ar_op != htons(ARPOP_REQUEST))
316                 return;
317
318         arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len;
319         memcpy(&sip, arp_ptr, 4);
320         arp_ptr += 4 + skb->dev->addr_len;
321         memcpy(&tip, arp_ptr, 4);
322
323         /* Should we ignore arp? */
324         if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip))
325                 return;
326
327         size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4);
328         send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev),
329                             LL_RESERVED_SPACE(np->dev));
330
331         if (!send_skb)
332                 return;
333
334         send_skb->nh.raw = send_skb->data;
335         arp = (struct arphdr *) skb_put(send_skb, size);
336         send_skb->dev = skb->dev;
337         send_skb->protocol = htons(ETH_P_ARP);
338
339         /* Fill the device header for the ARP frame */
340
341         if (np->dev->hard_header &&
342             np->dev->hard_header(send_skb, skb->dev, ptype,
343                                        np->remote_mac, np->local_mac,
344                                        send_skb->len) < 0) {
345                 kfree_skb(send_skb);
346                 return;
347         }
348
349         /*
350          * Fill out the arp protocol part.
351          *
352          * we only support ethernet device type,
353          * which (according to RFC 1390) should always equal 1 (Ethernet).
354          */
355
356         arp->ar_hrd = htons(np->dev->type);
357         arp->ar_pro = htons(ETH_P_IP);
358         arp->ar_hln = np->dev->addr_len;
359         arp->ar_pln = 4;
360         arp->ar_op = htons(type);
361
362         arp_ptr=(unsigned char *)(arp + 1);
363         memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
364         arp_ptr += np->dev->addr_len;
365         memcpy(arp_ptr, &tip, 4);
366         arp_ptr += 4;
367         memcpy(arp_ptr, np->remote_mac, np->dev->addr_len);
368         arp_ptr += np->dev->addr_len;
369         memcpy(arp_ptr, &sip, 4);
370
371         netpoll_send_skb(np, send_skb);
372 }
373
374 int netpoll_rx(struct sk_buff *skb)
375 {
376         int proto, len, ulen;
377         struct iphdr *iph;
378         struct udphdr *uh;
379         struct netpoll *np;
380         struct list_head *p;
381         unsigned long flags;
382
383         if (!(skb->dev->netpoll_rx & NETPOLL_RX_ENABLED))
384                 return 1;
385
386         if (skb->dev->type != ARPHRD_ETHER)
387                 goto out;
388
389         /* check if netpoll clients need ARP */
390         if (skb->protocol == __constant_htons(ETH_P_ARP) &&
391             atomic_read(&trapped)) {
392                 arp_reply(skb);
393                 return 1;
394         }
395
396         proto = ntohs(eth_hdr(skb)->h_proto);
397         if (proto != ETH_P_IP)
398                 goto out;
399         if (skb->pkt_type == PACKET_OTHERHOST)
400                 goto out;
401         if (skb_shared(skb))
402                 goto out;
403
404         iph = (struct iphdr *)skb->data;
405         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
406                 goto out;
407         if (iph->ihl < 5 || iph->version != 4)
408                 goto out;
409         if (!pskb_may_pull(skb, iph->ihl*4))
410                 goto out;
411         if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
412                 goto out;
413
414         len = ntohs(iph->tot_len);
415         if (skb->len < len || len < iph->ihl*4)
416                 goto out;
417
418         if (iph->protocol != IPPROTO_UDP)
419                 goto out;
420
421         len -= iph->ihl*4;
422         uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
423         ulen = ntohs(uh->len);
424
425         if (ulen != len)
426                 goto out;
427         if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr) < 0)
428                 goto out;
429
430         spin_lock_irqsave(&rx_list_lock, flags);
431         list_for_each(p, &rx_list) {
432                 np = list_entry(p, struct netpoll, rx_list);
433                 if (np->dev && np->dev != skb->dev)
434                         continue;
435                 if (np->local_ip && np->local_ip != ntohl(iph->daddr))
436                         continue;
437                 if (np->remote_ip && np->remote_ip != ntohl(iph->saddr))
438                         continue;
439                 if (np->local_port && np->local_port != ntohs(uh->dest))
440                         continue;
441
442                 spin_unlock_irqrestore(&rx_list_lock, flags);
443
444                 if (np->rx_hook)
445                         np->rx_hook(np, ntohs(uh->source),
446                                     (char *)(uh+1),
447                                     ulen - sizeof(struct udphdr));
448
449                 return 1;
450         }
451         spin_unlock_irqrestore(&rx_list_lock, flags);
452
453 out:
454         return atomic_read(&trapped);
455 }
456
457 int netpoll_parse_options(struct netpoll *np, char *opt)
458 {
459         char *cur=opt, *delim;
460
461         if(*cur != '@') {
462                 if ((delim = strchr(cur, '@')) == NULL)
463                         goto parse_failed;
464                 *delim=0;
465                 np->local_port=simple_strtol(cur, NULL, 10);
466                 cur=delim;
467         }
468         cur++;
469         printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port);
470
471         if(*cur != '/') {
472                 if ((delim = strchr(cur, '/')) == NULL)
473                         goto parse_failed;
474                 *delim=0;
475                 np->local_ip=ntohl(in_aton(cur));
476                 cur=delim;
477
478                 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
479                        np->name, HIPQUAD(np->local_ip));
480         }
481         cur++;
482
483         if ( *cur != ',') {
484                 /* parse out dev name */
485                 if ((delim = strchr(cur, ',')) == NULL)
486                         goto parse_failed;
487                 *delim=0;
488                 strlcpy(np->dev_name, cur, sizeof(np->dev_name));
489                 cur=delim;
490         }
491         cur++;
492
493         printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name);
494
495         if ( *cur != '@' ) {
496                 /* dst port */
497                 if ((delim = strchr(cur, '@')) == NULL)
498                         goto parse_failed;
499                 *delim=0;
500                 np->remote_port=simple_strtol(cur, NULL, 10);
501                 cur=delim;
502         }
503         cur++;
504         printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port);
505
506         /* dst ip */
507         if ((delim = strchr(cur, '/')) == NULL)
508                 goto parse_failed;
509         *delim=0;
510         np->remote_ip=ntohl(in_aton(cur));
511         cur=delim+1;
512
513         printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
514                        np->name, HIPQUAD(np->remote_ip));
515
516         if( *cur != 0 )
517         {
518                 /* MAC address */
519                 if ((delim = strchr(cur, ':')) == NULL)
520                         goto parse_failed;
521                 *delim=0;
522                 np->remote_mac[0]=simple_strtol(cur, NULL, 16);
523                 cur=delim+1;
524                 if ((delim = strchr(cur, ':')) == NULL)
525                         goto parse_failed;
526                 *delim=0;
527                 np->remote_mac[1]=simple_strtol(cur, NULL, 16);
528                 cur=delim+1;
529                 if ((delim = strchr(cur, ':')) == NULL)
530                         goto parse_failed;
531                 *delim=0;
532                 np->remote_mac[2]=simple_strtol(cur, NULL, 16);
533                 cur=delim+1;
534                 if ((delim = strchr(cur, ':')) == NULL)
535                         goto parse_failed;
536                 *delim=0;
537                 np->remote_mac[3]=simple_strtol(cur, NULL, 16);
538                 cur=delim+1;
539                 if ((delim = strchr(cur, ':')) == NULL)
540                         goto parse_failed;
541                 *delim=0;
542                 np->remote_mac[4]=simple_strtol(cur, NULL, 16);
543                 cur=delim+1;
544                 np->remote_mac[5]=simple_strtol(cur, NULL, 16);
545         }
546
547         printk(KERN_INFO "%s: remote ethernet address "
548                "%02x:%02x:%02x:%02x:%02x:%02x\n",
549                np->name,
550                np->remote_mac[0],
551                np->remote_mac[1],
552                np->remote_mac[2],
553                np->remote_mac[3],
554                np->remote_mac[4],
555                np->remote_mac[5]);
556
557         return 0;
558
559  parse_failed:
560         printk(KERN_INFO "%s: couldn't parse config at %s!\n",
561                np->name, cur);
562         return -1;
563 }
564
565 int netpoll_setup(struct netpoll *np)
566 {
567         struct net_device *ndev = NULL;
568         struct in_device *in_dev;
569
570         if (np->dev_name)
571                 ndev = dev_get_by_name(np->dev_name);
572         if (!ndev) {
573                 printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
574                        np->name, np->dev_name);
575                 return -1;
576         }
577         if (!ndev->poll_controller) {
578                 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
579                        np->name, np->dev_name);
580                 goto release;
581         }
582
583         if (!netif_running(ndev)) {
584                 unsigned short oflags;
585                 unsigned long atmost, atleast;
586
587                 printk(KERN_INFO "%s: device %s not up yet, forcing it\n",
588                        np->name, np->dev_name);
589
590                 oflags = ndev->flags;
591
592                 rtnl_shlock();
593                 if (dev_change_flags(ndev, oflags | IFF_UP) < 0) {
594                         printk(KERN_ERR "%s: failed to open %s\n",
595                                np->name, np->dev_name);
596                         rtnl_shunlock();
597                         goto release;
598                 }
599                 rtnl_shunlock();
600
601                 atleast = jiffies + HZ/10;
602                 atmost = jiffies + 10*HZ;
603                 while (!netif_carrier_ok(ndev)) {
604                         if (time_after(jiffies, atmost)) {
605                                 printk(KERN_NOTICE
606                                        "%s: timeout waiting for carrier\n",
607                                        np->name);
608                                 break;
609                         }
610                         cond_resched();
611                 }
612
613                 if (time_before(jiffies, atleast)) {
614                         printk(KERN_NOTICE "%s: carrier detect appears flaky,"
615                                " waiting 10 seconds\n",
616                                np->name);
617                         while (time_before(jiffies, atmost))
618                                 cond_resched();
619                 }
620         }
621
622         if (!memcmp(np->local_mac, "\0\0\0\0\0\0", 6) && ndev->dev_addr)
623                 memcpy(np->local_mac, ndev->dev_addr, 6);
624
625         if (!np->local_ip) {
626                 rcu_read_lock();
627                 in_dev = __in_dev_get(ndev);
628
629                 if (!in_dev || !in_dev->ifa_list) {
630                         rcu_read_unlock();
631                         printk(KERN_ERR "%s: no IP address for %s, aborting\n",
632                                np->name, np->dev_name);
633                         goto release;
634                 }
635
636                 np->local_ip = ntohl(in_dev->ifa_list->ifa_local);
637                 rcu_read_unlock();
638                 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
639                        np->name, HIPQUAD(np->local_ip));
640         }
641
642         np->dev = ndev;
643
644         if(np->rx_hook) {
645                 unsigned long flags;
646
647                 np->dev->netpoll_rx = NETPOLL_RX_ENABLED;
648
649                 spin_lock_irqsave(&rx_list_lock, flags);
650                 list_add(&np->rx_list, &rx_list);
651                 spin_unlock_irqrestore(&rx_list_lock, flags);
652         }
653
654         if(np->dump_func)
655                 netdump_func = np->dump_func;
656
657         return 0;
658  release:
659         dev_put(ndev);
660         return -1;
661 }
662
663 void netpoll_cleanup(struct netpoll *np)
664 {
665         if (np->rx_hook) {
666                 unsigned long flags;
667
668                 spin_lock_irqsave(&rx_list_lock, flags);
669                 list_del(&np->rx_list);
670                 spin_unlock_irqrestore(&rx_list_lock, flags);
671         }
672
673         if (np->dev)
674                 np->dev->netpoll_rx = 0;
675         dev_put(np->dev);
676         np->dev = NULL;
677 }
678
679 int netpoll_trap(void)
680 {
681         return atomic_read(&trapped);
682 }
683
684 void netpoll_set_trap(int trap)
685 {
686         if (trap)
687                 atomic_inc(&trapped);
688         else
689                 atomic_dec(&trapped);
690 }
691
692 void netpoll_reset_locks(struct netpoll *np)
693 {
694         spin_lock_init(&rx_list_lock);
695         spin_lock_init(&skb_list_lock);
696         spin_lock_init(&np->dev->xmit_lock);
697 }
698
699 EXPORT_SYMBOL(netpoll_set_trap);
700 EXPORT_SYMBOL(netpoll_trap);
701 EXPORT_SYMBOL(netpoll_parse_options);
702 EXPORT_SYMBOL(netpoll_setup);
703 EXPORT_SYMBOL(netpoll_cleanup);
704 EXPORT_SYMBOL(netpoll_send_skb);
705 EXPORT_SYMBOL(netpoll_send_udp);
706 EXPORT_SYMBOL(netpoll_poll);
707 EXPORT_SYMBOL_GPL(netpoll_reset_locks);