2 * Implements the dump driver interface for saving a dump via network
5 * Some of this code has been taken/adapted from Ingo Molnar's netconsole
6 * code. LKCD team expresses its thanks to Ingo.
8 * Started: June 2002 - Mohamed Abbas <mohamed.abbas@intel.com>
9 * Adapted netconsole code to implement LKCD dump over the network.
11 * Nov 2002 - Bharata B. Rao <bharata@in.ibm.com>
12 * Innumerable code cleanups, simplification and some fixes.
13 * Netdump configuration done by ioctl instead of using module parameters.
15 * Copyright (C) 2001 Ingo Molnar <mingo@redhat.com>
16 * Copyright (C) 2002 International Business Machines Corp.
18 * This code is released under version 2 of the GNU GPL.
23 #include <linux/delay.h>
24 #include <linux/random.h>
25 #include <linux/reboot.h>
26 #include <linux/module.h>
27 #include <linux/dump.h>
28 #include <linux/dump_netdev.h>
29 #include <linux/percpu.h>
31 #include <asm/unaligned.h>
33 static int startup_handshake;
34 static int page_counter;
35 static struct net_device *dump_ndev;
36 static struct in_device *dump_in_dev;
37 static u16 source_port, target_port;
38 static u32 source_ip, target_ip;
39 static unsigned char daddr[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff} ;
40 static spinlock_t dump_skb_lock = SPIN_LOCK_UNLOCKED;
41 static int dump_nr_skbs;
42 static struct sk_buff *dump_skb;
43 static unsigned long flags_global;
44 static int netdump_in_progress;
45 static char device_name[IFNAMSIZ];
48 * security depends on the trusted path between the netconsole
49 * server and netconsole client, since none of the packets are
50 * encrypted. The random magic number protects the protocol
53 static u64 dump_magic;
55 #define MAX_UDP_CHUNK 1460
56 #define MAX_PRINT_CHUNK (MAX_UDP_CHUNK-HEADER_LEN)
59 * We maintain a small pool of fully-sized skbs,
60 * to make sure the message gets out even in
61 * extreme OOM situations.
63 #define DUMP_MAX_SKBS 32
65 #define MAX_SKB_SIZE \
66 (MAX_UDP_CHUNK + sizeof(struct udphdr) + \
67 sizeof(struct iphdr) + sizeof(struct ethhdr))
70 dump_refill_skbs(void)
75 spin_lock_irqsave(&dump_skb_lock, flags);
76 while (dump_nr_skbs < DUMP_MAX_SKBS) {
77 skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
87 spin_unlock_irqrestore(&dump_skb_lock, flags);
91 sk_buff * dump_get_skb(void)
96 spin_lock_irqsave(&dump_skb_lock, flags);
103 spin_unlock_irqrestore(&dump_skb_lock, flags);
109 * Zap completed output skbs.
112 zap_completion_queue(void)
116 struct softnet_data *sd;
119 sd = &__get_cpu_var(softnet_data);
120 if (sd->completion_queue) {
121 struct sk_buff *clist;
123 local_irq_save(flags);
124 clist = sd->completion_queue;
125 sd->completion_queue = NULL;
126 local_irq_restore(flags);
128 while (clist != NULL) {
129 struct sk_buff *skb = clist;
134 printk("Error in sk list\n");
140 dump_send_skb(struct net_device *dev, const char *msg, unsigned int msg_len,
144 int total_len, eth_len, ip_len, udp_len, count = 0;
150 udp_len = msg_len + HEADER_LEN + sizeof(*udph);
151 ip_len = eth_len = udp_len + sizeof(*iph);
152 total_len = eth_len + ETH_HLEN;
155 zap_completion_queue();
156 if (dump_nr_skbs < DUMP_MAX_SKBS)
159 skb = alloc_skb(total_len, GFP_ATOMIC);
161 skb = dump_get_skb();
164 if (once && (count == 1000000)) {
165 printk("possibly FATAL: out of netconsole "
166 "skbs!!! will keep retrying.\n");
169 dev->poll_controller(dev);
174 atomic_set(&skb->users, 1);
175 skb_reserve(skb, total_len - msg_len - HEADER_LEN);
176 skb->data[0] = NETCONSOLE_VERSION;
178 put_unaligned(htonl(reply->nr), (u32 *) (skb->data + 1));
179 put_unaligned(htonl(reply->code), (u32 *) (skb->data + 5));
180 put_unaligned(htonl(reply->info), (u32 *) (skb->data + 9));
182 memcpy(skb->data + HEADER_LEN, msg, msg_len);
183 skb->len += msg_len + HEADER_LEN;
185 udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
186 udph->source = source_port;
187 udph->dest = target_port;
188 udph->len = htons(udp_len);
191 iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
196 iph->tot_len = htons(ip_len);
200 iph->protocol = IPPROTO_UDP;
202 iph->saddr = source_ip;
203 iph->daddr = target_ip;
204 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
206 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
208 eth->h_proto = htons(ETH_P_IP);
209 memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
210 memcpy(eth->h_dest, daddr, dev->addr_len);
214 spin_lock(&dev->xmit_lock);
215 dev->xmit_lock_owner = smp_processor_id();
220 if (netif_queue_stopped(dev)) {
221 dev->xmit_lock_owner = -1;
222 spin_unlock(&dev->xmit_lock);
224 dev->poll_controller(dev);
225 zap_completion_queue();
231 dev->hard_start_xmit(skb, dev);
233 dev->xmit_lock_owner = -1;
234 spin_unlock(&dev->xmit_lock);
237 static unsigned short
238 udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr,
241 return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base);
245 udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
246 unsigned short ulen, u32 saddr, u32 daddr)
248 if (uh->check == 0) {
249 skb->ip_summed = CHECKSUM_UNNECESSARY;
250 } else if (skb->ip_summed == CHECKSUM_HW) {
251 skb->ip_summed = CHECKSUM_UNNECESSARY;
252 if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
254 skb->ip_summed = CHECKSUM_NONE;
256 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
257 skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen,
259 /* Probably, we should checksum udp header (it should be in cache
260 * in any case) and data in tiny packets (< rx copybreak).
265 static __inline__ int
266 __udp_checksum_complete(struct sk_buff *skb)
268 return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len,
273 int udp_checksum_complete(struct sk_buff *skb)
275 return skb->ip_summed != CHECKSUM_UNNECESSARY &&
276 __udp_checksum_complete(skb);
283 dump_rx_hook(struct sk_buff *skb)
288 __u32 len, saddr, daddr, ulen;
292 * First check if were are dumping or doing startup handshake, if
293 * not quickly return.
295 if (!netdump_in_progress)
296 return NET_RX_SUCCESS;
298 if (skb->dev->type != ARPHRD_ETHER)
301 proto = ntohs(skb->mac.ethernet->h_proto);
302 if (proto != ETH_P_IP)
305 if (skb->pkt_type == PACKET_OTHERHOST)
311 /* IP header correctness testing: */
312 iph = (struct iphdr *)skb->data;
313 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
316 if (iph->ihl < 5 || iph->version != 4)
319 if (!pskb_may_pull(skb, iph->ihl*4))
322 if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
325 len = ntohs(iph->tot_len);
326 if (skb->len < len || len < iph->ihl*4)
331 if (iph->protocol != IPPROTO_UDP)
334 if (source_ip != daddr)
337 if (target_ip != saddr)
341 uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
342 ulen = ntohs(uh->len);
344 if (ulen != len || ulen < (sizeof(*uh) + sizeof(*__req)))
347 if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
350 if (udp_checksum_complete(skb))
353 if (source_port != uh->dest)
356 if (target_port != uh->source)
359 __req = (req_t *)(uh + 1);
360 if ((ntohl(__req->command) != COMM_GET_MAGIC) &&
361 (ntohl(__req->command) != COMM_HELLO) &&
362 (ntohl(__req->command) != COMM_START_WRITE_NETDUMP_ACK) &&
363 (ntohl(__req->command) != COMM_START_NETDUMP_ACK) &&
364 (memcmp(&__req->magic, &dump_magic, sizeof(dump_magic)) != 0))
367 req.magic = ntohl(__req->magic);
368 req.command = ntohl(__req->command);
369 req.from = ntohl(__req->from);
370 req.to = ntohl(__req->to);
371 req.nr = ntohl(__req->nr);
378 dump_send_mem(struct net_device *dev, req_t *req, const char* buff, size_t len)
382 int nr_chunks = len/1024;
390 for (i = 0; i < nr_chunks; i++) {
391 unsigned int offset = i*1024;
392 reply.code = REPLY_MEM;
394 dump_send_skb(dev, buff + offset, 1024, &reply);
399 * This function waits for the client to acknowledge the receipt
400 * of the netdump startup reply, with the possibility of packets
401 * getting lost. We resend the startup packet if no ACK is received,
402 * after a 1 second delay.
404 * (The client can test the success of the handshake via the HELLO
405 * command, and send ACKs until we enter netdump mode.)
408 dump_handshake(struct dump_dev *net_dev)
414 if (startup_handshake) {
415 sprintf(tmp, "NETDUMP start, waiting for start-ACK.\n");
416 reply.code = REPLY_START_NETDUMP;
420 sprintf(tmp, "NETDUMP start, waiting for start-ACK.\n");
421 reply.code = REPLY_START_WRITE_NETDUMP;
422 reply.nr = net_dev->curr_offset;
423 reply.info = net_dev->curr_offset;
426 /* send 300 handshake packets before declaring failure */
427 for (i = 0; i < 300; i++) {
428 dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply);
431 for (j = 0; j < 10000; j++) {
433 dump_ndev->poll_controller(dump_ndev);
434 zap_completion_queue();
440 * if there is no new request, try sending the handshaking
447 * check if the new request is of the expected type,
448 * if so, return, else try sending the handshaking
451 if (startup_handshake) {
452 if (req.command == COMM_HELLO || req.command ==
453 COMM_START_NETDUMP_ACK) {
460 if (req.command == COMM_SEND_MEM) {
472 do_netdump(struct dump_dev *net_dev, const char* buff, size_t len)
477 int repeatCounter, counter, total_loop;
479 netdump_in_progress = 1;
481 if (dump_handshake(net_dev) < 0) {
482 printk("network dump failed due to handshake failure\n");
487 * Ideally startup handshake should be done during dump configuration,
488 * i.e., in dump_net_open(). This will be done when I figure out
489 * the dependency between startup handshake, subsequent write and
490 * various commands wrt to net-server.
492 if (startup_handshake)
493 startup_handshake = 0;
500 dump_ndev->poll_controller(dump_ndev);
501 zap_completion_queue();
506 if (repeatCounter > 5) {
508 if (counter > 10000) {
509 if (total_loop >= 100000) {
510 printk("Time OUT LEAVE NOW\n");
514 printk("Try number %d out of "
515 "10 before Time Out\n",
528 switch (req.command) {
533 dump_send_mem(dump_ndev, &req, buff, len);
537 case COMM_START_WRITE_NETDUMP_ACK:
542 sprintf(tmp, "Hello, this is netdump version "
543 "0.%02d\n", NETCONSOLE_VERSION);
544 reply.code = REPLY_HELLO;
546 reply.info = net_dev->curr_offset;
547 dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply);
550 case COMM_GET_PAGE_SIZE:
551 sprintf(tmp, "PAGE_SIZE: %ld\n", PAGE_SIZE);
552 reply.code = REPLY_PAGE_SIZE;
554 reply.info = PAGE_SIZE;
555 dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply);
558 case COMM_GET_NR_PAGES:
559 reply.code = REPLY_NR_PAGES;
561 reply.info = num_physpages;
562 reply.info = page_counter;
563 sprintf(tmp, "Number of pages: %ld\n", num_physpages);
564 dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply);
568 reply.code = REPLY_MAGIC;
570 reply.info = NETCONSOLE_VERSION;
571 dump_send_skb(dump_ndev, (char *)&dump_magic,
572 sizeof(dump_magic), &reply);
576 reply.code = REPLY_ERROR;
578 reply.info = req.command;
579 sprintf(tmp, "Got unknown command code %d!\n",
581 dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply);
586 netdump_in_progress = 0;
591 dump_validate_config(void)
593 source_ip = dump_in_dev->ifa_list->ifa_local;
595 printk("network device %s has no local address, "
596 "aborting.\n", device_name);
600 #define IP(x) ((unsigned char *)&source_ip)[x]
601 printk("Source %d.%d.%d.%d", IP(0), IP(1), IP(2), IP(3));
605 printk("source_port parameter not specified, aborting.\n");
608 printk(":%i\n", source_port);
609 source_port = htons(source_port);
612 printk("target_ip parameter not specified, aborting.\n");
616 #define IP(x) ((unsigned char *)&target_ip)[x]
617 printk("Target %d.%d.%d.%d", IP(0), IP(1), IP(2), IP(3));
621 printk("target_port parameter not specified, aborting.\n");
624 printk(":%i\n", target_port);
625 target_port = htons(target_port);
627 printk("Target Ethernet Address %02x:%02x:%02x:%02x:%02x:%02x",
628 daddr[0], daddr[1], daddr[2], daddr[3], daddr[4], daddr[5]);
630 if ((daddr[0] & daddr[1] & daddr[2] & daddr[3] & daddr[4] &
632 printk("(Broadcast)");
638 * Prepares the dump device so we can take a dump later.
639 * Validates the netdump configuration parameters.
641 * TODO: Network connectivity check should be done here.
644 dump_net_open(struct dump_dev *net_dev, unsigned long arg)
648 /* get the interface name */
649 if (copy_from_user(device_name, (void *)arg, IFNAMSIZ))
652 if (!(dump_ndev = dev_get_by_name(device_name))) {
653 printk("network device %s does not exist, aborting.\n",
658 if (!dump_ndev->poll_controller) {
659 printk("network device %s does not implement polling yet, "
660 "aborting.\n", device_name);
661 retval = -1; /* return proper error */
665 if (!(dump_in_dev = in_dev_get(dump_ndev))) {
666 printk("network device %s is not an IP protocol device, "
667 "aborting.\n", device_name);
672 if ((retval = dump_validate_config()) < 0)
675 net_dev->curr_offset = 0;
676 printk("Network device %s successfully configured for dumping\n",
680 in_dev_put(dump_in_dev);
687 * Close the dump device and release associated resources
688 * Invoked when unconfiguring the dump device.
691 dump_net_release(struct dump_dev *net_dev)
694 in_dev_put(dump_in_dev);
701 * Prepare the dump device for use (silence any ongoing activity
702 * and quiesce state) when the system crashes.
705 dump_net_silence(struct dump_dev *net_dev)
708 local_irq_save(flags_global);
709 dump_ndev->rx_hook = dump_rx_hook;
710 startup_handshake = 1;
711 net_dev->curr_offset = 0;
712 printk("Dumping to network device %s on CPU %d ...\n", device_name,
718 * Invoked when dumping is done. This is the time to put things back
719 * (i.e. undo the effects of dump_block_silence) so the device is
720 * available for normal use.
723 dump_net_resume(struct dump_dev *net_dev)
732 sprintf(tmp, "NETDUMP end.\n");
733 for( indx = 0; indx < 6; indx++) {
734 reply.code = REPLY_END_NETDUMP;
737 dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply);
739 printk("NETDUMP END!\n");
740 local_irq_restore(flags_global);
742 dump_ndev->rx_hook = NULL;
743 startup_handshake = 0;
748 * Seek to the specified offset in the dump device.
749 * Makes sure this is a valid offset, otherwise returns an error.
752 dump_net_seek(struct dump_dev *net_dev, loff_t off)
755 * For now using DUMP_HEADER_OFFSET as hard coded value,
756 * See dump_block_seekin dump_blockdev.c to know how to
759 net_dev->curr_offset = off;
767 dump_net_write(struct dump_dev *net_dev, void *buf, unsigned long len)
772 cnt = len/ PAGE_SIZE;
774 for (i = 0; i < cnt; i++) {
776 ret = do_netdump(net_dev, buf+off, PAGE_SIZE);
779 net_dev->curr_offset = net_dev->curr_offset + PAGE_SIZE;
785 * check if the last dump i/o is over and ready for next request
788 dump_net_ready(struct dump_dev *net_dev, void *buf)
794 * ioctl function used for configuring network dump
797 dump_net_ioctl(struct dump_dev *net_dev, unsigned int cmd, unsigned long arg)
804 target_port = (u16)arg;
807 source_port = (u16)arg;
810 return copy_from_user(daddr, (void *)arg, 6);
823 struct dump_dev_ops dump_netdev_ops = {
824 .open = dump_net_open,
825 .release = dump_net_release,
826 .silence = dump_net_silence,
827 .resume = dump_net_resume,
828 .seek = dump_net_seek,
829 .write = dump_net_write,
830 /* .read not implemented */
831 .ready = dump_net_ready,
832 .ioctl = dump_net_ioctl
835 static struct dump_dev default_dump_netdev = {
836 .type_name = "networkdev",
837 .ops = &dump_netdev_ops,
842 dump_netdev_init(void)
844 default_dump_netdev.curr_offset = 0;
846 if (dump_register_device(&default_dump_netdev) < 0) {
847 printk("network dump device driver registration failed\n");
850 printk("network device driver for LKCD registered\n");
852 get_random_bytes(&dump_magic, sizeof(dump_magic));
857 dump_netdev_cleanup(void)
859 dump_unregister_device(&default_dump_netdev);
862 MODULE_AUTHOR("LKCD Development Team <lkcd-devel@lists.sourceforge.net>");
863 MODULE_DESCRIPTION("Network Dump Driver for Linux Kernel Crash Dump (LKCD)");
864 MODULE_LICENSE("GPL");
866 module_init(dump_netdev_init);
867 module_exit(dump_netdev_cleanup);