Fedora Core 2 - 1.492
[linux-2.6.git] / net / core / netpoll.c
1 /*
2  * Common framework for low-level network console, dump, and debugger code
3  *
4  * Sep 8 2003  Matt Mackall <mpm@selenic.com>
5  *
6  * based on the netconsole code from:
7  *
8  * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
9  * Copyright (C) 2002  Red Hat, Inc.
10  */
11
12 #include <linux/smp_lock.h>
13 #include <linux/netdevice.h>
14 #include <linux/etherdevice.h>
15 #include <linux/string.h>
16 #include <linux/inetdevice.h>
17 #include <linux/inet.h>
18 #include <linux/interrupt.h>
19 #include <linux/netpoll.h>
20 #include <linux/sched.h>
21 #include <linux/nmi.h>
22 #include <net/tcp.h>
23 #include <net/udp.h>
24
25 /*
26  * We maintain a small pool of fully-sized skbs, to make sure the
27  * message gets out even in extreme OOM situations.
28  */
29
30 #define MAX_SKBS 32
31 #define MAX_UDP_CHUNK 1460
32
33 #define NETPOLL_RX_ENABLED  1
34 #define NETPOLL_RX_DROP     2
35
36 static spinlock_t skb_list_lock = SPIN_LOCK_UNLOCKED;
37 static int nr_skbs;
38 static struct sk_buff *skbs;
39
40 static spinlock_t rx_list_lock = SPIN_LOCK_UNLOCKED;
41 static LIST_HEAD(rx_list);
42
43 static int trapped;
44
45 extern void (*netdump_func) (struct pt_regs *regs);
46
47 #define MAX_SKB_SIZE \
48                 (MAX_UDP_CHUNK + sizeof(struct udphdr) + \
49                                 sizeof(struct iphdr) + sizeof(struct ethhdr))
50
51 static void zap_completion_queue(void);
52
53 static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
54                              unsigned short ulen, u32 saddr, u32 daddr)
55 {
56         if (uh->check == 0)
57                 return 0;
58
59         if (skb->ip_summed == CHECKSUM_HW)
60                 return csum_tcpudp_magic(
61                         saddr, daddr, ulen, IPPROTO_UDP, skb->csum);
62
63         skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
64
65         return csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
66 }
67
68 void netpoll_poll(struct netpoll *np)
69 {
70         int budget = netdump_mode ? 64 : 16;
71
72         if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
73                 return;
74
75         /* Process pending work on NIC */
76         np->dev->poll_controller(np->dev);
77
78         /* If scheduling is stopped, tickle NAPI bits */
79         if (np->dev->poll && 
80             test_bit(__LINK_STATE_RX_SCHED, &np->dev->state)) {
81                 np->dev->netpoll_rx |= NETPOLL_RX_DROP;
82                 if (trapped) {
83                         np->dev->poll(np->dev, &budget);
84                 } else {
85                         trapped = 1;
86                         np->dev->poll(np->dev, &budget);
87                         trapped = 0;
88                 }
89                 np->dev->netpoll_rx &= ~NETPOLL_RX_DROP;
90         }
91
92         zap_completion_queue();
93 }
94
95 static void refill_skbs(void)
96 {
97         struct sk_buff *skb;
98         unsigned long flags;
99
100         spin_lock_irqsave(&skb_list_lock, flags);
101         while (nr_skbs < MAX_SKBS) {
102                 skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
103                 if (!skb)
104                         break;
105
106                 skb->next = skbs;
107                 skbs = skb;
108                 nr_skbs++;
109         }
110         spin_unlock_irqrestore(&skb_list_lock, flags);
111 }
112
113 static void zap_completion_queue(void)
114 {
115         unsigned long flags;
116         struct softnet_data *sd = &get_cpu_var(softnet_data);
117
118         if (sd->completion_queue) {
119                 struct sk_buff *clist;
120
121                 local_irq_save(flags);
122                 clist = sd->completion_queue;
123                 sd->completion_queue = NULL;
124                 local_irq_restore(flags);
125
126                 while (clist != NULL) {
127                         struct sk_buff *skb = clist;
128                         clist = clist->next;
129                         __kfree_skb(skb);
130                 }
131         }
132
133         put_cpu_var(softnet_data);
134         touch_nmi_watchdog();
135 }
136
137 static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve)
138 {
139         int once = 1, count = 0;
140         unsigned long flags;
141         struct sk_buff *skb = NULL;
142
143         zap_completion_queue();
144 repeat:
145         if (nr_skbs < MAX_SKBS)
146                 refill_skbs();
147
148         skb = alloc_skb(len, GFP_ATOMIC);
149
150         if (!skb) {
151                 spin_lock_irqsave(&skb_list_lock, flags);
152                 skb = skbs;
153                 if (skb)
154                         skbs = skb->next;
155                 skb->next = NULL;
156                 nr_skbs--;
157                 spin_unlock_irqrestore(&skb_list_lock, flags);
158         }
159
160         if(!skb) {
161                 count++;
162                 if (once && (count == 1000000)) {
163                         printk("out of netpoll skbs!\n");
164                         once = 0;
165                 }
166                 netpoll_poll(np);
167                 goto repeat;
168         }
169
170         atomic_set(&skb->users, 1);
171         skb_reserve(skb, reserve);
172         return skb;
173 }
174
175 void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
176 {
177         int status;
178
179 repeat:
180         if(!np || !np->dev || !netif_running(np->dev)) {
181                 __kfree_skb(skb);
182                 return;
183         }
184
185         spin_lock(&np->dev->xmit_lock);
186         np->dev->xmit_lock_owner = smp_processor_id();
187
188         status = np->dev->hard_start_xmit(skb, np->dev);
189         np->dev->xmit_lock_owner = -1;
190         spin_unlock(&np->dev->xmit_lock);
191
192         /* transmit busy */
193         if(status) {
194                 netpoll_poll(np);
195                 goto repeat;
196         }
197 }
198
199 void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
200 {
201         int total_len, eth_len, ip_len, udp_len;
202         struct sk_buff *skb;
203         struct udphdr *udph;
204         struct iphdr *iph;
205         struct ethhdr *eth;
206
207         udp_len = len + sizeof(*udph);
208         ip_len = eth_len = udp_len + sizeof(*iph);
209         total_len = eth_len + ETH_HLEN;
210
211         skb = find_skb(np, total_len, total_len - len);
212         if (!skb)
213                 return;
214
215         memcpy(skb->data, msg, len);
216         skb->len += len;
217
218         udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
219         udph->source = htons(np->local_port);
220         udph->dest = htons(np->remote_port);
221         udph->len = htons(udp_len);
222         udph->check = 0;
223
224         iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
225
226         iph->version  = 4;
227         iph->ihl      = 5;
228         iph->tos      = 0;
229         iph->tot_len  = htons(ip_len);
230         iph->id       = 0;
231         iph->frag_off = 0;
232         iph->ttl      = 64;
233         iph->protocol = IPPROTO_UDP;
234         iph->check    = 0;
235         iph->saddr    = htonl(np->local_ip);
236         iph->daddr    = htonl(np->remote_ip);
237         iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
238
239         eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
240
241         eth->h_proto = htons(ETH_P_IP);
242         memcpy(eth->h_source, np->local_mac, 6);
243         memcpy(eth->h_dest, np->remote_mac, 6);
244
245         netpoll_send_skb(np, skb);
246 }
247
248 static void arp_reply(struct sk_buff *skb)
249 {
250         struct arphdr *arp;
251         unsigned char *arp_ptr;
252         int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
253         u32 sip, tip;
254         struct sk_buff *send_skb;
255         unsigned long flags;
256         struct list_head *p;
257         struct netpoll *np = NULL;
258
259         spin_lock_irqsave(&rx_list_lock, flags);
260         list_for_each(p, &rx_list) {
261                 np = list_entry(p, struct netpoll, rx_list);
262                 if ( np->dev == skb->dev )
263                         break;
264                 np = NULL;
265         }
266         spin_unlock_irqrestore(&rx_list_lock, flags);
267
268         if (!np) return;
269
270         /* No arp on this interface */
271         if (skb->dev->flags & IFF_NOARP)
272                 return;
273
274         if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
275                                  (2 * skb->dev->addr_len) +
276                                  (2 * sizeof(u32)))))
277                 return;
278
279         skb->h.raw = skb->nh.raw = skb->data;
280         arp = skb->nh.arph;
281
282         if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
283              arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
284             arp->ar_pro != htons(ETH_P_IP) ||
285             arp->ar_op != htons(ARPOP_REQUEST))
286                 return;
287
288         arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len;
289         memcpy(&sip, arp_ptr, 4);
290         arp_ptr += 4 + skb->dev->addr_len;
291         memcpy(&tip, arp_ptr, 4);
292
293         /* Should we ignore arp? */
294         if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip))
295                 return;
296
297         size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4);
298         send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev),
299                             LL_RESERVED_SPACE(np->dev));
300
301         if (!send_skb)
302                 return;
303
304         send_skb->nh.raw = send_skb->data;
305         arp = (struct arphdr *) skb_put(send_skb, size);
306         send_skb->dev = skb->dev;
307         send_skb->protocol = htons(ETH_P_ARP);
308
309         /* Fill the device header for the ARP frame */
310
311         if (np->dev->hard_header &&
312             np->dev->hard_header(send_skb, skb->dev, ptype,
313                                        np->remote_mac, np->local_mac,
314                                        send_skb->len) < 0) {
315                 kfree_skb(send_skb);
316                 return;
317         }
318
319         /*
320          * Fill out the arp protocol part.
321          *
322          * we only support ethernet device type,
323          * which (according to RFC 1390) should always equal 1 (Ethernet).
324          */
325
326         arp->ar_hrd = htons(np->dev->type);
327         arp->ar_pro = htons(ETH_P_IP);
328         arp->ar_hln = np->dev->addr_len;
329         arp->ar_pln = 4;
330         arp->ar_op = htons(type);
331
332         arp_ptr=(unsigned char *)(arp + 1);
333         memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
334         arp_ptr += np->dev->addr_len;
335         memcpy(arp_ptr, &tip, 4);
336         arp_ptr += 4;
337         memcpy(arp_ptr, np->remote_mac, np->dev->addr_len);
338         arp_ptr += np->dev->addr_len;
339         memcpy(arp_ptr, &sip, 4);
340
341         netpoll_send_skb(np, send_skb);
342 }
343
344 int netpoll_rx(struct sk_buff *skb)
345 {
346         int proto, len, ulen;
347         struct iphdr *iph;
348         struct udphdr *uh;
349         struct netpoll *np;
350         struct list_head *p;
351         unsigned long flags;
352
353         if (!(skb->dev->netpoll_rx & NETPOLL_RX_ENABLED))
354                 return 1;
355
356         if (skb->dev->type != ARPHRD_ETHER)
357                 goto out;
358
359         /* check if netpoll clients need ARP */
360         if (skb->protocol == __constant_htons(ETH_P_ARP) && trapped) {
361                 arp_reply(skb);
362                 return 1;
363         }
364
365         proto = ntohs(skb->mac.ethernet->h_proto);
366         if (proto != ETH_P_IP)
367                 goto out;
368         if (skb->pkt_type == PACKET_OTHERHOST)
369                 goto out;
370         if (skb_shared(skb))
371                 goto out;
372
373         iph = (struct iphdr *)skb->data;
374         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
375                 goto out;
376         if (iph->ihl < 5 || iph->version != 4)
377                 goto out;
378         if (!pskb_may_pull(skb, iph->ihl*4))
379                 goto out;
380         if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
381                 goto out;
382
383         len = ntohs(iph->tot_len);
384         if (skb->len < len || len < iph->ihl*4)
385                 goto out;
386
387         if (iph->protocol != IPPROTO_UDP)
388                 goto out;
389
390         len -= iph->ihl*4;
391         uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
392         ulen = ntohs(uh->len);
393
394         if (ulen != len)
395                 goto out;
396         if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr) < 0)
397                 goto out;
398
399         spin_lock_irqsave(&rx_list_lock, flags);
400         list_for_each(p, &rx_list) {
401                 np = list_entry(p, struct netpoll, rx_list);
402                 if (np->dev && np->dev != skb->dev)
403                         continue;
404                 if (np->local_ip && np->local_ip != ntohl(iph->daddr))
405                         continue;
406                 if (np->remote_ip && np->remote_ip != ntohl(iph->saddr))
407                         continue;
408                 if (np->local_port && np->local_port != ntohs(uh->dest))
409                         continue;
410
411                 spin_unlock_irqrestore(&rx_list_lock, flags);
412
413                 if (np->rx_hook)
414                         np->rx_hook(np, ntohs(uh->source),
415                                     (char *)(uh+1),
416                                     ulen - sizeof(struct udphdr));
417
418                 return 1;
419         }
420         spin_unlock_irqrestore(&rx_list_lock, flags);
421
422 out:
423         return trapped;
424 }
425
426 int netpoll_parse_options(struct netpoll *np, char *opt)
427 {
428         char *cur=opt, *delim;
429
430         if(*cur != '@') {
431                 if ((delim = strchr(cur, '@')) == NULL)
432                         goto parse_failed;
433                 *delim=0;
434                 np->local_port=simple_strtol(cur, NULL, 10);
435                 cur=delim;
436         }
437         cur++;
438         printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port);
439
440         if(*cur != '/') {
441                 if ((delim = strchr(cur, '/')) == NULL)
442                         goto parse_failed;
443                 *delim=0;
444                 np->local_ip=ntohl(in_aton(cur));
445                 cur=delim;
446
447                 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
448                        np->name, HIPQUAD(np->local_ip));
449         }
450         cur++;
451
452         if ( *cur != ',') {
453                 /* parse out dev name */
454                 if ((delim = strchr(cur, ',')) == NULL)
455                         goto parse_failed;
456                 *delim=0;
457                 strlcpy(np->dev_name, cur, sizeof(np->dev_name));
458                 cur=delim;
459         }
460         cur++;
461
462         printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name);
463
464         if ( *cur != '@' ) {
465                 /* dst port */
466                 if ((delim = strchr(cur, '@')) == NULL)
467                         goto parse_failed;
468                 *delim=0;
469                 np->remote_port=simple_strtol(cur, NULL, 10);
470                 cur=delim;
471         }
472         cur++;
473         printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port);
474
475         /* dst ip */
476         if ((delim = strchr(cur, '/')) == NULL)
477                 goto parse_failed;
478         *delim=0;
479         np->remote_ip=ntohl(in_aton(cur));
480         cur=delim+1;
481
482         printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
483                        np->name, HIPQUAD(np->remote_ip));
484
485         if( *cur != 0 )
486         {
487                 /* MAC address */
488                 if ((delim = strchr(cur, ':')) == NULL)
489                         goto parse_failed;
490                 *delim=0;
491                 np->remote_mac[0]=simple_strtol(cur, NULL, 16);
492                 cur=delim+1;
493                 if ((delim = strchr(cur, ':')) == NULL)
494                         goto parse_failed;
495                 *delim=0;
496                 np->remote_mac[1]=simple_strtol(cur, NULL, 16);
497                 cur=delim+1;
498                 if ((delim = strchr(cur, ':')) == NULL)
499                         goto parse_failed;
500                 *delim=0;
501                 np->remote_mac[2]=simple_strtol(cur, NULL, 16);
502                 cur=delim+1;
503                 if ((delim = strchr(cur, ':')) == NULL)
504                         goto parse_failed;
505                 *delim=0;
506                 np->remote_mac[3]=simple_strtol(cur, NULL, 16);
507                 cur=delim+1;
508                 if ((delim = strchr(cur, ':')) == NULL)
509                         goto parse_failed;
510                 *delim=0;
511                 np->remote_mac[4]=simple_strtol(cur, NULL, 16);
512                 cur=delim+1;
513                 np->remote_mac[5]=simple_strtol(cur, NULL, 16);
514         }
515
516         printk(KERN_INFO "%s: remote ethernet address "
517                "%02x:%02x:%02x:%02x:%02x:%02x\n",
518                np->name,
519                np->remote_mac[0],
520                np->remote_mac[1],
521                np->remote_mac[2],
522                np->remote_mac[3],
523                np->remote_mac[4],
524                np->remote_mac[5]);
525
526         return 0;
527
528  parse_failed:
529         printk(KERN_INFO "%s: couldn't parse config at %s!\n",
530                np->name, cur);
531         return -1;
532 }
533
534 int netpoll_setup(struct netpoll *np)
535 {
536         struct net_device *ndev = NULL;
537         struct in_device *in_dev;
538
539         if (np->dev_name)
540                 ndev = dev_get_by_name(np->dev_name);
541         if (!ndev) {
542                 printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
543                        np->name, np->dev_name);
544                 return -1;
545         }
546         if (!ndev->poll_controller) {
547                 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
548                        np->name, np->dev_name);
549                 goto release;
550         }
551
552         if (!(ndev->flags & IFF_UP)) {
553                 unsigned short oflags;
554                 unsigned long atmost, atleast;
555
556                 printk(KERN_INFO "%s: device %s not up yet, forcing it\n",
557                        np->name, np->dev_name);
558
559                 oflags = ndev->flags;
560
561                 rtnl_shlock();
562                 if (dev_change_flags(ndev, oflags | IFF_UP) < 0) {
563                         printk(KERN_ERR "%s: failed to open %s\n",
564                                np->name, np->dev_name);
565                         rtnl_shunlock();
566                         goto release;
567                 }
568                 rtnl_shunlock();
569
570                 atleast = jiffies + HZ/10;
571                 atmost = jiffies + 10*HZ;
572                 while (!netif_carrier_ok(ndev)) {
573                         if (time_after(jiffies, atmost)) {
574                                 printk(KERN_NOTICE
575                                        "%s: timeout waiting for carrier\n",
576                                        np->name);
577                                 break;
578                         }
579                         cond_resched();
580                 }
581
582                 if (time_before(jiffies, atleast)) {
583                         printk(KERN_NOTICE "%s: carrier detect appears flaky,"
584                                " waiting 10 seconds\n",
585                                np->name);
586                         while (time_before(jiffies, atmost))
587                                 cond_resched();
588                 }
589         }
590
591         if (!memcmp(np->local_mac, "\0\0\0\0\0\0", 6) && ndev->dev_addr)
592                 memcpy(np->local_mac, ndev->dev_addr, 6);
593
594         if (!np->local_ip) {
595                 in_dev = in_dev_get(ndev);
596
597                 if (!in_dev) {
598                         printk(KERN_ERR "%s: no IP address for %s, aborting\n",
599                                np->name, np->dev_name);
600                         goto release;
601                 }
602
603                 np->local_ip = ntohl(in_dev->ifa_list->ifa_local);
604                 in_dev_put(in_dev);
605                 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
606                        np->name, HIPQUAD(np->local_ip));
607         }
608
609         np->dev = ndev;
610
611         if(np->rx_hook) {
612                 unsigned long flags;
613
614                 np->dev->netpoll_rx = NETPOLL_RX_ENABLED;
615
616                 spin_lock_irqsave(&rx_list_lock, flags);
617                 list_add(&np->rx_list, &rx_list);
618                 spin_unlock_irqrestore(&rx_list_lock, flags);
619         }
620
621         if(np->dump_func)
622                 netdump_func = np->dump_func;
623
624         return 0;
625  release:
626         dev_put(ndev);
627         return -1;
628 }
629
630 void netpoll_cleanup(struct netpoll *np)
631 {
632         if(np->rx_hook) {
633                 unsigned long flags;
634
635                 spin_lock_irqsave(&rx_list_lock, flags);
636                 list_del(&np->rx_list);
637                 np->dev->netpoll_rx = 0;
638                 spin_unlock_irqrestore(&rx_list_lock, flags);
639         }
640
641         dev_put(np->dev);
642         np->dev = NULL;
643 }
644
645 int netpoll_trap(void)
646 {
647         return trapped;
648 }
649
650 void netpoll_set_trap(int trap)
651 {
652         trapped = trap;
653 }
654
655 void netpoll_reset_locks(struct netpoll *np)
656 {
657         spin_lock_init(&rx_list_lock);
658         spin_lock_init(&skb_list_lock);
659         spin_lock_init(&np->dev->xmit_lock);
660 }
661
662 EXPORT_SYMBOL(netpoll_set_trap);
663 EXPORT_SYMBOL(netpoll_trap);
664 EXPORT_SYMBOL(netpoll_parse_options);
665 EXPORT_SYMBOL(netpoll_setup);
666 EXPORT_SYMBOL(netpoll_cleanup);
667 EXPORT_SYMBOL(netpoll_send_skb);
668 EXPORT_SYMBOL(netpoll_send_udp);
669 EXPORT_SYMBOL(netpoll_poll);
670 EXPORT_SYMBOL_GPL(netpoll_reset_locks);