vserver 1.9.3
[linux-2.6.git] / net / core / netpoll.c
1 /*
2  * Common framework for low-level network console, dump, and debugger code
3  *
4  * Sep 8 2003  Matt Mackall <mpm@selenic.com>
5  *
6  * based on the netconsole code from:
7  *
8  * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
9  * Copyright (C) 2002  Red Hat, Inc.
10  */
11
12 #include <linux/smp_lock.h>
13 #include <linux/netdevice.h>
14 #include <linux/etherdevice.h>
15 #include <linux/string.h>
16 #include <linux/inetdevice.h>
17 #include <linux/inet.h>
18 #include <linux/interrupt.h>
19 #include <linux/netpoll.h>
20 #include <linux/sched.h>
21 #include <linux/rcupdate.h>
22 #include <net/tcp.h>
23 #include <net/udp.h>
24 #include <asm/unaligned.h>
25
26 /*
27  * We maintain a small pool of fully-sized skbs, to make sure the
28  * message gets out even in extreme OOM situations.
29  */
30
31 #define MAX_SKBS 32
32 #define MAX_UDP_CHUNK 1460
33
34 static spinlock_t skb_list_lock = SPIN_LOCK_UNLOCKED;
35 static int nr_skbs;
36 static struct sk_buff *skbs;
37
38 static spinlock_t rx_list_lock = SPIN_LOCK_UNLOCKED;
39 static LIST_HEAD(rx_list);
40
41 static atomic_t trapped;
42 spinlock_t netpoll_poll_lock = SPIN_LOCK_UNLOCKED;
43
44 #define NETPOLL_RX_ENABLED  1
45 #define NETPOLL_RX_DROP     2
46
47 #define MAX_SKB_SIZE \
48                 (MAX_UDP_CHUNK + sizeof(struct udphdr) + \
49                                 sizeof(struct iphdr) + sizeof(struct ethhdr))
50
51 static void zap_completion_queue(void);
52
53 static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
54                              unsigned short ulen, u32 saddr, u32 daddr)
55 {
56         if (uh->check == 0)
57                 return 0;
58
59         if (skb->ip_summed == CHECKSUM_HW)
60                 return csum_tcpudp_magic(
61                         saddr, daddr, ulen, IPPROTO_UDP, skb->csum);
62
63         skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
64
65         return csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
66 }
67
68 void netpoll_poll(struct netpoll *np)
69 {
70         /*
71          * In cases where there is bi-directional communications, reading
72          * only one message at a time can lead to packets being dropped by
73          * the network adapter, forcing superfluous retries and possibly
74          * timeouts.  Thus, we set our budget to a more reasonable value.
75          */
76         int budget = 16;
77         unsigned long flags;
78
79         if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
80                 return;
81
82         /* Process pending work on NIC */
83         np->dev->poll_controller(np->dev);
84
85         /* If scheduling is stopped, tickle NAPI bits */
86         spin_lock_irqsave(&netpoll_poll_lock, flags);
87         if (np->dev->poll &&
88             test_bit(__LINK_STATE_RX_SCHED, &np->dev->state)) {
89                 np->dev->netpoll_rx |= NETPOLL_RX_DROP;
90                 atomic_inc(&trapped);
91
92                 np->dev->poll(np->dev, &budget);
93
94                 atomic_dec(&trapped);
95                 np->dev->netpoll_rx &= ~NETPOLL_RX_DROP;
96         }
97         spin_unlock_irqrestore(&netpoll_poll_lock, flags);
98
99         zap_completion_queue();
100 }
101
102 static void refill_skbs(void)
103 {
104         struct sk_buff *skb;
105         unsigned long flags;
106
107         spin_lock_irqsave(&skb_list_lock, flags);
108         while (nr_skbs < MAX_SKBS) {
109                 skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
110                 if (!skb)
111                         break;
112
113                 skb->next = skbs;
114                 skbs = skb;
115                 nr_skbs++;
116         }
117         spin_unlock_irqrestore(&skb_list_lock, flags);
118 }
119
120 static void zap_completion_queue(void)
121 {
122         unsigned long flags;
123         struct softnet_data *sd = &get_cpu_var(softnet_data);
124
125         if (sd->completion_queue) {
126                 struct sk_buff *clist;
127
128                 local_irq_save(flags);
129                 clist = sd->completion_queue;
130                 sd->completion_queue = NULL;
131                 local_irq_restore(flags);
132
133                 while (clist != NULL) {
134                         struct sk_buff *skb = clist;
135                         clist = clist->next;
136                         __kfree_skb(skb);
137                 }
138         }
139
140         put_cpu_var(softnet_data);
141 }
142
143 static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve)
144 {
145         int once = 1, count = 0;
146         unsigned long flags;
147         struct sk_buff *skb = NULL;
148
149         zap_completion_queue();
150 repeat:
151         if (nr_skbs < MAX_SKBS)
152                 refill_skbs();
153
154         skb = alloc_skb(len, GFP_ATOMIC);
155
156         if (!skb) {
157                 spin_lock_irqsave(&skb_list_lock, flags);
158                 skb = skbs;
159                 if (skb)
160                         skbs = skb->next;
161                 skb->next = NULL;
162                 nr_skbs--;
163                 spin_unlock_irqrestore(&skb_list_lock, flags);
164         }
165
166         if(!skb) {
167                 count++;
168                 if (once && (count == 1000000)) {
169                         printk("out of netpoll skbs!\n");
170                         once = 0;
171                 }
172                 netpoll_poll(np);
173                 goto repeat;
174         }
175
176         atomic_set(&skb->users, 1);
177         skb_reserve(skb, reserve);
178         return skb;
179 }
180
181 void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
182 {
183         int status;
184
185 repeat:
186         if(!np || !np->dev || !netif_running(np->dev)) {
187                 __kfree_skb(skb);
188                 return;
189         }
190
191         spin_lock(&np->dev->xmit_lock);
192         np->dev->xmit_lock_owner = smp_processor_id();
193
194         /*
195          * network drivers do not expect to be called if the queue is
196          * stopped.
197          */
198         if (netif_queue_stopped(np->dev)) {
199                 np->dev->xmit_lock_owner = -1;
200                 spin_unlock(&np->dev->xmit_lock);
201
202                 netpoll_poll(np);
203                 goto repeat;
204         }
205
206         status = np->dev->hard_start_xmit(skb, np->dev);
207         np->dev->xmit_lock_owner = -1;
208         spin_unlock(&np->dev->xmit_lock);
209
210         /* transmit busy */
211         if(status) {
212                 netpoll_poll(np);
213                 goto repeat;
214         }
215 }
216
217 void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
218 {
219         int total_len, eth_len, ip_len, udp_len;
220         struct sk_buff *skb;
221         struct udphdr *udph;
222         struct iphdr *iph;
223         struct ethhdr *eth;
224
225         udp_len = len + sizeof(*udph);
226         ip_len = eth_len = udp_len + sizeof(*iph);
227         total_len = eth_len + ETH_HLEN;
228
229         skb = find_skb(np, total_len, total_len - len);
230         if (!skb)
231                 return;
232
233         memcpy(skb->data, msg, len);
234         skb->len += len;
235
236         udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
237         udph->source = htons(np->local_port);
238         udph->dest = htons(np->remote_port);
239         udph->len = htons(udp_len);
240         udph->check = 0;
241
242         iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
243
244         /* iph->version = 4; iph->ihl = 5; */
245         put_unaligned(0x45, (unsigned char *)iph);
246         iph->tos      = 0;
247         put_unaligned(htons(ip_len), &(iph->tot_len));
248         iph->id       = 0;
249         iph->frag_off = 0;
250         iph->ttl      = 64;
251         iph->protocol = IPPROTO_UDP;
252         iph->check    = 0;
253         put_unaligned(htonl(np->local_ip), &(iph->saddr));
254         put_unaligned(htonl(np->remote_ip), &(iph->daddr));
255         iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
256
257         eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
258
259         eth->h_proto = htons(ETH_P_IP);
260         memcpy(eth->h_source, np->local_mac, 6);
261         memcpy(eth->h_dest, np->remote_mac, 6);
262
263         netpoll_send_skb(np, skb);
264 }
265
266 static void arp_reply(struct sk_buff *skb)
267 {
268         struct arphdr *arp;
269         unsigned char *arp_ptr;
270         int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
271         u32 sip, tip;
272         struct sk_buff *send_skb;
273         unsigned long flags;
274         struct list_head *p;
275         struct netpoll *np = NULL;
276
277         spin_lock_irqsave(&rx_list_lock, flags);
278         list_for_each(p, &rx_list) {
279                 np = list_entry(p, struct netpoll, rx_list);
280                 if ( np->dev == skb->dev )
281                         break;
282                 np = NULL;
283         }
284         spin_unlock_irqrestore(&rx_list_lock, flags);
285
286         if (!np) return;
287
288         /* No arp on this interface */
289         if (skb->dev->flags & IFF_NOARP)
290                 return;
291
292         if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
293                                  (2 * skb->dev->addr_len) +
294                                  (2 * sizeof(u32)))))
295                 return;
296
297         skb->h.raw = skb->nh.raw = skb->data;
298         arp = skb->nh.arph;
299
300         if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
301              arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
302             arp->ar_pro != htons(ETH_P_IP) ||
303             arp->ar_op != htons(ARPOP_REQUEST))
304                 return;
305
306         arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len;
307         memcpy(&sip, arp_ptr, 4);
308         arp_ptr += 4 + skb->dev->addr_len;
309         memcpy(&tip, arp_ptr, 4);
310
311         /* Should we ignore arp? */
312         if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip))
313                 return;
314
315         size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4);
316         send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev),
317                             LL_RESERVED_SPACE(np->dev));
318
319         if (!send_skb)
320                 return;
321
322         send_skb->nh.raw = send_skb->data;
323         arp = (struct arphdr *) skb_put(send_skb, size);
324         send_skb->dev = skb->dev;
325         send_skb->protocol = htons(ETH_P_ARP);
326
327         /* Fill the device header for the ARP frame */
328
329         if (np->dev->hard_header &&
330             np->dev->hard_header(send_skb, skb->dev, ptype,
331                                        np->remote_mac, np->local_mac,
332                                        send_skb->len) < 0) {
333                 kfree_skb(send_skb);
334                 return;
335         }
336
337         /*
338          * Fill out the arp protocol part.
339          *
340          * we only support ethernet device type,
341          * which (according to RFC 1390) should always equal 1 (Ethernet).
342          */
343
344         arp->ar_hrd = htons(np->dev->type);
345         arp->ar_pro = htons(ETH_P_IP);
346         arp->ar_hln = np->dev->addr_len;
347         arp->ar_pln = 4;
348         arp->ar_op = htons(type);
349
350         arp_ptr=(unsigned char *)(arp + 1);
351         memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
352         arp_ptr += np->dev->addr_len;
353         memcpy(arp_ptr, &tip, 4);
354         arp_ptr += 4;
355         memcpy(arp_ptr, np->remote_mac, np->dev->addr_len);
356         arp_ptr += np->dev->addr_len;
357         memcpy(arp_ptr, &sip, 4);
358
359         netpoll_send_skb(np, send_skb);
360 }
361
362 int netpoll_rx(struct sk_buff *skb)
363 {
364         int proto, len, ulen;
365         struct iphdr *iph;
366         struct udphdr *uh;
367         struct netpoll *np;
368         struct list_head *p;
369         unsigned long flags;
370
371         if (skb->dev->type != ARPHRD_ETHER)
372                 goto out;
373
374         /* check if netpoll clients need ARP */
375         if (skb->protocol == __constant_htons(ETH_P_ARP) &&
376             atomic_read(&trapped)) {
377                 arp_reply(skb);
378                 return 1;
379         }
380
381         proto = ntohs(eth_hdr(skb)->h_proto);
382         if (proto != ETH_P_IP)
383                 goto out;
384         if (skb->pkt_type == PACKET_OTHERHOST)
385                 goto out;
386         if (skb_shared(skb))
387                 goto out;
388
389         iph = (struct iphdr *)skb->data;
390         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
391                 goto out;
392         if (iph->ihl < 5 || iph->version != 4)
393                 goto out;
394         if (!pskb_may_pull(skb, iph->ihl*4))
395                 goto out;
396         if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
397                 goto out;
398
399         len = ntohs(iph->tot_len);
400         if (skb->len < len || len < iph->ihl*4)
401                 goto out;
402
403         if (iph->protocol != IPPROTO_UDP)
404                 goto out;
405
406         len -= iph->ihl*4;
407         uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
408         ulen = ntohs(uh->len);
409
410         if (ulen != len)
411                 goto out;
412         if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr) < 0)
413                 goto out;
414
415         spin_lock_irqsave(&rx_list_lock, flags);
416         list_for_each(p, &rx_list) {
417                 np = list_entry(p, struct netpoll, rx_list);
418                 if (np->dev && np->dev != skb->dev)
419                         continue;
420                 if (np->local_ip && np->local_ip != ntohl(iph->daddr))
421                         continue;
422                 if (np->remote_ip && np->remote_ip != ntohl(iph->saddr))
423                         continue;
424                 if (np->local_port && np->local_port != ntohs(uh->dest))
425                         continue;
426
427                 spin_unlock_irqrestore(&rx_list_lock, flags);
428
429                 if (np->rx_hook)
430                         np->rx_hook(np, ntohs(uh->source),
431                                     (char *)(uh+1),
432                                     ulen - sizeof(struct udphdr));
433
434                 return 1;
435         }
436         spin_unlock_irqrestore(&rx_list_lock, flags);
437
438 out:
439         return atomic_read(&trapped);
440 }
441
442 int netpoll_parse_options(struct netpoll *np, char *opt)
443 {
444         char *cur=opt, *delim;
445
446         if(*cur != '@') {
447                 if ((delim = strchr(cur, '@')) == NULL)
448                         goto parse_failed;
449                 *delim=0;
450                 np->local_port=simple_strtol(cur, NULL, 10);
451                 cur=delim;
452         }
453         cur++;
454         printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port);
455
456         if(*cur != '/') {
457                 if ((delim = strchr(cur, '/')) == NULL)
458                         goto parse_failed;
459                 *delim=0;
460                 np->local_ip=ntohl(in_aton(cur));
461                 cur=delim;
462
463                 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
464                        np->name, HIPQUAD(np->local_ip));
465         }
466         cur++;
467
468         if ( *cur != ',') {
469                 /* parse out dev name */
470                 if ((delim = strchr(cur, ',')) == NULL)
471                         goto parse_failed;
472                 *delim=0;
473                 strlcpy(np->dev_name, cur, sizeof(np->dev_name));
474                 cur=delim;
475         }
476         cur++;
477
478         printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name);
479
480         if ( *cur != '@' ) {
481                 /* dst port */
482                 if ((delim = strchr(cur, '@')) == NULL)
483                         goto parse_failed;
484                 *delim=0;
485                 np->remote_port=simple_strtol(cur, NULL, 10);
486                 cur=delim;
487         }
488         cur++;
489         printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port);
490
491         /* dst ip */
492         if ((delim = strchr(cur, '/')) == NULL)
493                 goto parse_failed;
494         *delim=0;
495         np->remote_ip=ntohl(in_aton(cur));
496         cur=delim+1;
497
498         printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
499                        np->name, HIPQUAD(np->remote_ip));
500
501         if( *cur != 0 )
502         {
503                 /* MAC address */
504                 if ((delim = strchr(cur, ':')) == NULL)
505                         goto parse_failed;
506                 *delim=0;
507                 np->remote_mac[0]=simple_strtol(cur, NULL, 16);
508                 cur=delim+1;
509                 if ((delim = strchr(cur, ':')) == NULL)
510                         goto parse_failed;
511                 *delim=0;
512                 np->remote_mac[1]=simple_strtol(cur, NULL, 16);
513                 cur=delim+1;
514                 if ((delim = strchr(cur, ':')) == NULL)
515                         goto parse_failed;
516                 *delim=0;
517                 np->remote_mac[2]=simple_strtol(cur, NULL, 16);
518                 cur=delim+1;
519                 if ((delim = strchr(cur, ':')) == NULL)
520                         goto parse_failed;
521                 *delim=0;
522                 np->remote_mac[3]=simple_strtol(cur, NULL, 16);
523                 cur=delim+1;
524                 if ((delim = strchr(cur, ':')) == NULL)
525                         goto parse_failed;
526                 *delim=0;
527                 np->remote_mac[4]=simple_strtol(cur, NULL, 16);
528                 cur=delim+1;
529                 np->remote_mac[5]=simple_strtol(cur, NULL, 16);
530         }
531
532         printk(KERN_INFO "%s: remote ethernet address "
533                "%02x:%02x:%02x:%02x:%02x:%02x\n",
534                np->name,
535                np->remote_mac[0],
536                np->remote_mac[1],
537                np->remote_mac[2],
538                np->remote_mac[3],
539                np->remote_mac[4],
540                np->remote_mac[5]);
541
542         return 0;
543
544  parse_failed:
545         printk(KERN_INFO "%s: couldn't parse config at %s!\n",
546                np->name, cur);
547         return -1;
548 }
549
550 int netpoll_setup(struct netpoll *np)
551 {
552         struct net_device *ndev = NULL;
553         struct in_device *in_dev;
554
555         if (np->dev_name)
556                 ndev = dev_get_by_name(np->dev_name);
557         if (!ndev) {
558                 printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
559                        np->name, np->dev_name);
560                 return -1;
561         }
562         if (!ndev->poll_controller) {
563                 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
564                        np->name, np->dev_name);
565                 goto release;
566         }
567
568         if (!(ndev->flags & IFF_UP)) {
569                 unsigned short oflags;
570                 unsigned long atmost, atleast;
571
572                 printk(KERN_INFO "%s: device %s not up yet, forcing it\n",
573                        np->name, np->dev_name);
574
575                 oflags = ndev->flags;
576
577                 rtnl_shlock();
578                 if (dev_change_flags(ndev, oflags | IFF_UP) < 0) {
579                         printk(KERN_ERR "%s: failed to open %s\n",
580                                np->name, np->dev_name);
581                         rtnl_shunlock();
582                         goto release;
583                 }
584                 rtnl_shunlock();
585
586                 atleast = jiffies + HZ/10;
587                 atmost = jiffies + 10*HZ;
588                 while (!netif_carrier_ok(ndev)) {
589                         if (time_after(jiffies, atmost)) {
590                                 printk(KERN_NOTICE
591                                        "%s: timeout waiting for carrier\n",
592                                        np->name);
593                                 break;
594                         }
595                         cond_resched();
596                 }
597
598                 if (time_before(jiffies, atleast)) {
599                         printk(KERN_NOTICE "%s: carrier detect appears flaky,"
600                                " waiting 10 seconds\n",
601                                np->name);
602                         while (time_before(jiffies, atmost))
603                                 cond_resched();
604                 }
605         }
606
607         if (!memcmp(np->local_mac, "\0\0\0\0\0\0", 6) && ndev->dev_addr)
608                 memcpy(np->local_mac, ndev->dev_addr, 6);
609
610         if (!np->local_ip) {
611                 rcu_read_lock();
612                 in_dev = __in_dev_get(ndev);
613
614                 if (!in_dev) {
615                         rcu_read_unlock();
616                         printk(KERN_ERR "%s: no IP address for %s, aborting\n",
617                                np->name, np->dev_name);
618                         goto release;
619                 }
620
621                 np->local_ip = ntohl(in_dev->ifa_list->ifa_local);
622                 rcu_read_unlock();
623                 printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
624                        np->name, HIPQUAD(np->local_ip));
625         }
626
627         np->dev = ndev;
628
629         if(np->rx_hook) {
630                 unsigned long flags;
631
632                 np->dev->netpoll_rx = NETPOLL_RX_ENABLED;
633
634                 spin_lock_irqsave(&rx_list_lock, flags);
635                 list_add(&np->rx_list, &rx_list);
636                 spin_unlock_irqrestore(&rx_list_lock, flags);
637         }
638
639         return 0;
640  release:
641         dev_put(ndev);
642         return -1;
643 }
644
645 void netpoll_cleanup(struct netpoll *np)
646 {
647         if (np->rx_hook) {
648                 unsigned long flags;
649
650                 spin_lock_irqsave(&rx_list_lock, flags);
651                 list_del(&np->rx_list);
652                 spin_unlock_irqrestore(&rx_list_lock, flags);
653         }
654
655         if (np->dev)
656                 np->dev->netpoll_rx = 0;
657         dev_put(np->dev);
658         np->dev = NULL;
659 }
660
661 int netpoll_trap(void)
662 {
663         return atomic_read(&trapped);
664 }
665
666 void netpoll_set_trap(int trap)
667 {
668         if (trap)
669                 atomic_inc(&trapped);
670         else
671                 atomic_dec(&trapped);
672 }
673
674 EXPORT_SYMBOL(netpoll_set_trap);
675 EXPORT_SYMBOL(netpoll_trap);
676 EXPORT_SYMBOL(netpoll_parse_options);
677 EXPORT_SYMBOL(netpoll_setup);
678 EXPORT_SYMBOL(netpoll_cleanup);
679 EXPORT_SYMBOL(netpoll_send_skb);
680 EXPORT_SYMBOL(netpoll_send_udp);
681 EXPORT_SYMBOL(netpoll_poll);